Skip to content

Commit 905c010

Browse files
author
git apple-llvm automerger
committed
Merge commit '1847a4efae6b' from llvm.org/main into next
2 parents 8c71fb7 + 1847a4e commit 905c010

File tree

3 files changed

+762
-896
lines changed

3 files changed

+762
-896
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2681,9 +2681,13 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo,
26812681
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(
26822682
MF, cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex());
26832683

2684+
EVT MaskVT = LoMask.getValueType();
2685+
assert(MaskVT.getScalarType() == MVT::i1 && "Expected vector of i1s");
2686+
26842687
// We store LoVec and then insert HiVec starting at offset=|1s| in LoMask.
2685-
SDValue WideMask =
2686-
DAG.getNode(ISD::ZERO_EXTEND, DL, LoMask.getValueType(), LoMask);
2688+
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
2689+
MaskVT.getVectorElementCount());
2690+
SDValue WideMask = DAG.getNode(ISD::ZERO_EXTEND, DL, WideMaskVT, LoMask);
26872691
SDValue Offset = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, WideMask);
26882692
Offset = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Offset);
26892693

llvm/test/CodeGen/AArch64/sve-vector-compress.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -145,17 +145,17 @@ define <vscale x 8 x i32> @test_compress_large(<vscale x 8 x i32> %vec, <vscale
145145
; CHECK-NEXT: .cfi_offset w29, -16
146146
; CHECK-NEXT: punpklo p1.h, p0.b
147147
; CHECK-NEXT: cnth x9
148-
; CHECK-NEXT: ptrue p2.s
149-
; CHECK-NEXT: sub x9, x9, #1
150148
; CHECK-NEXT: punpkhi p0.h, p0.b
149+
; CHECK-NEXT: sub x9, x9, #1
150+
; CHECK-NEXT: cntp x8, p1, p1.s
151151
; CHECK-NEXT: compact z0.s, p1, z0.s
152-
; CHECK-NEXT: cntp x8, p2, p1.s
153152
; CHECK-NEXT: compact z1.s, p0, z1.s
154-
; CHECK-NEXT: str z0, [sp]
153+
; CHECK-NEXT: ptrue p0.s
155154
; CHECK-NEXT: cmp x8, x9
155+
; CHECK-NEXT: str z0, [sp]
156156
; CHECK-NEXT: csel x8, x8, x9, lo
157157
; CHECK-NEXT: mov x9, sp
158-
; CHECK-NEXT: st1w { z1.s }, p2, [x9, x8, lsl #2]
158+
; CHECK-NEXT: st1w { z1.s }, p0, [x9, x8, lsl #2]
159159
; CHECK-NEXT: ldr z0, [sp]
160160
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
161161
; CHECK-NEXT: addvl sp, sp, #2
@@ -231,23 +231,24 @@ define <4 x double> @test_compress_v4f64_with_sve(<4 x double> %vec, <4 x i1> %m
231231
; CHECK-NEXT: sub sp, sp, #32
232232
; CHECK-NEXT: .cfi_def_cfa_offset 32
233233
; CHECK-NEXT: ushll v2.4s, v2.4h, #0
234-
; CHECK-NEXT: ptrue p0.d
234+
; CHECK-NEXT: movi v5.2s, #1
235235
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
236236
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
237+
; CHECK-NEXT: mov x9, sp
238+
; CHECK-NEXT: ptrue p0.d
237239
; CHECK-NEXT: ushll v3.2d, v2.2s, #0
238240
; CHECK-NEXT: ushll2 v4.2d, v2.4s, #0
239-
; CHECK-NEXT: fmov x8, d2
241+
; CHECK-NEXT: and v2.8b, v2.8b, v5.8b
240242
; CHECK-NEXT: shl v3.2d, v3.2d, #63
241243
; CHECK-NEXT: shl v4.2d, v4.2d, #63
242-
; CHECK-NEXT: lsr x9, x8, #32
243-
; CHECK-NEXT: eor w8, w8, w9
244-
; CHECK-NEXT: mov x9, sp
244+
; CHECK-NEXT: addp v2.2s, v2.2s, v2.2s
245245
; CHECK-NEXT: cmlt v3.2d, v3.2d, #0
246246
; CHECK-NEXT: cmlt v4.2d, v4.2d, #0
247-
; CHECK-NEXT: and x8, x8, #0x3
248-
; CHECK-NEXT: lsl x8, x8, #3
247+
; CHECK-NEXT: fmov w8, s2
249248
; CHECK-NEXT: and z3.d, z3.d, #0x1
250249
; CHECK-NEXT: and z4.d, z4.d, #0x1
250+
; CHECK-NEXT: and x8, x8, #0x3
251+
; CHECK-NEXT: lsl x8, x8, #3
251252
; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0
252253
; CHECK-NEXT: cmpne p0.d, p0/z, z4.d, #0
253254
; CHECK-NEXT: compact z0.d, p1, z0.d

0 commit comments

Comments
 (0)