-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[NFC][PowerPC] Optimize vector compares for not equal to non zero vectors #171635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-powerpc Author: None (Himadhith) ChangesLockdown instructions for vector compares not equal to non-zero. Current implementation can be made better by removing the negation and using the identity Full diff: https://github.com/llvm/llvm-project/pull/171635.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/PowerPC/optimize-vector-not-equal.ll b/llvm/test/CodeGen/PowerPC/optimize-vector-not-equal.ll
new file mode 100644
index 0000000000000..c3bb2d5ecc461
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/optimize-vector-not-equal.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64LE
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_32
+
+; The current implementation is comparing vector of non-zeros in register v2 with v3. v3 is then negated and converts:
+; 0XFFFF -> 0
+; 0 -> 1
+; An optimized version is to follow this NFC patch
+
+define i32 @cols_needed(<4 x i16> %wide.load) {
+; POWERPC_64LE-LABEL: cols_needed:
+; POWERPC_64LE: # %bb.0: # %entry
+; POWERPC_64LE-NEXT: xxlxor v3, v3, v3
+; POWERPC_64LE-NEXT: li r3, 0
+; POWERPC_64LE-NEXT: vcmpequh v2, v2, v3
+; POWERPC_64LE-NEXT: xxleqv v3, v3, v3
+; POWERPC_64LE-NEXT: vmrglh v2, v2, v2
+; POWERPC_64LE-NEXT: vsubuwm v2, v2, v3
+; POWERPC_64LE-NEXT: xxswapd v3, v2
+; POWERPC_64LE-NEXT: vadduwm v2, v2, v3
+; POWERPC_64LE-NEXT: xxspltw v3, v2, 2
+; POWERPC_64LE-NEXT: vadduwm v2, v2, v3
+; POWERPC_64LE-NEXT: vextuwrx r3, r3, v2
+; POWERPC_64LE-NEXT: blr
+;
+; POWERPC_64-LABEL: cols_needed:
+; POWERPC_64: # %bb.0: # %entry
+; POWERPC_64-NEXT: xxlxor v3, v3, v3
+; POWERPC_64-NEXT: li r3, 0
+; POWERPC_64-NEXT: vcmpequh v2, v2, v3
+; POWERPC_64-NEXT: xxleqv v3, v3, v3
+; POWERPC_64-NEXT: vmrghh v2, v2, v2
+; POWERPC_64-NEXT: vsubuwm v2, v2, v3
+; POWERPC_64-NEXT: xxswapd v3, v2
+; POWERPC_64-NEXT: vadduwm v2, v2, v3
+; POWERPC_64-NEXT: xxspltw v3, v2, 1
+; POWERPC_64-NEXT: vadduwm v2, v2, v3
+; POWERPC_64-NEXT: vextuwlx r3, r3, v2
+; POWERPC_64-NEXT: blr
+;
+; POWERPC_32-LABEL: cols_needed:
+; POWERPC_32: # %bb.0: # %entry
+; POWERPC_32-NEXT: xxlxor v3, v3, v3
+; POWERPC_32-NEXT: vcmpequh v2, v2, v3
+; POWERPC_32-NEXT: xxleqv v3, v3, v3
+; POWERPC_32-NEXT: vmrghh v2, v2, v2
+; POWERPC_32-NEXT: vsubuwm v2, v2, v3
+; POWERPC_32-NEXT: xxswapd v3, v2
+; POWERPC_32-NEXT: vadduwm v2, v2, v3
+; POWERPC_32-NEXT: xxspltw v3, v2, 1
+; POWERPC_32-NEXT: vadduwm v2, v2, v3
+; POWERPC_32-NEXT: stxv v2, -16(r1)
+; POWERPC_32-NEXT: lwz r3, -16(r1)
+; POWERPC_32-NEXT: blr
+entry:
+ %0 = icmp ne <4 x i16> %wide.load, zeroinitializer
+ %1 = zext <4 x i1> %0 to <4 x i32>
+ %2 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %1)
+ ret i32 %2
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #0
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
|
This is a comment for testing the issue write workflow |
|
This is another comment for testing the issue write workflow that was placed in a separate file |
a37d89d to
d056c90
Compare
tonykuttai
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/39/builds/9073 Here is the relevant piece of the build log for the reference |
…tors (llvm#171635) Lockdown instructions for vector compares `not equal to non-zero (Ex: vec[i]!=7)`. Current implementation can be made better by removing the negation and using the identity ``` 0XFFFF + 1 = 0 and 0 + 1 = 0 ``` Co-authored-by: himadhith <himadhith.v@ibm.com>
Lockdown instructions for vector compares
not equal to non-zero (Ex: vec[i]!=7). Current implementation can be made better by removing the negation and using the identity0XFFFF + 1 = 0 and 0 + 1 = 0