Skip to content

Commit 4e6ab5e

Browse files
vmustyaigcbot
authored andcommitted
Improve i64 emulation optimization for add and sub for VC
When the i64 source values are created as low/high parts combination, the compiler can eliminate the wrregion and bitcast operations and propagate the low/high parts directly to the i64 emulation routines. Also, when the i64 source value of an add or sub instruction is a zero-extended i32 value, the compiler can optimize the high part addition or substraction.
1 parent d6d2fa4 commit 4e6ab5e

File tree

7 files changed

+242
-37
lines changed

7 files changed

+242
-37
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXConstants.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,7 +1437,7 @@ Instruction *ConstantLoader::load(Instruction *InsertBefore) {
14371437

14381438
if (!PackedFloat && !PackedIntScale &&
14391439
!isa<UndefValue>(C)) { // not packed int constant or undef
1440-
if (auto CC = getConsolidatedConstant(C)) {
1440+
if (auto *CC = getConsolidatedConstant(C)) {
14411441
// We're loading a vector of byte or short (but not i1). Use int so the
14421442
// instruction does not use so many channels. This may also save it being
14431443
// split by legalization.
@@ -1477,6 +1477,10 @@ Instruction *ConstantLoader::load(Instruction *InsertBefore) {
14771477

14781478
NewInst = cast<Instruction>(Builder.CreateBitCast(NewInst, Ty));
14791479

1480+
if (Ty->isIntOrIntVectorTy(64) && !allowI64Ops())
1481+
NewInst =
1482+
genx::emulateI64Operation(&Subtarget, NewInst, EmulationFlag::RAUWE);
1483+
14801484
if (AddedInstructions)
14811485
AddedInstructions->push_back(NewInst);
14821486

@@ -1548,9 +1552,11 @@ Instruction *ConstantLoader::loadPackedInt(Instruction *Inst) {
15481552
Val -= PackedIntAdjust;
15491553
Val /= PackedIntScale;
15501554
}
1551-
PackedVals.push_back(ConstantInt::get(PackTy, Val, /*isSigned=*/true));
1552-
IGC_ASSERT(cast<ConstantInt>(PackedVals.back())->getSExtValue() >= -8 &&
1553-
cast<ConstantInt>(PackedVals.back())->getSExtValue() <= 15);
1555+
auto *PackedConst =
1556+
cast<ConstantInt>(ConstantInt::get(PackTy, Val, /*isSigned=*/true));
1557+
PackedVals.push_back(PackedConst);
1558+
IGC_ASSERT(PackedConst->getSExtValue() >= -8 &&
1559+
PackedConst->getSExtValue() <= 15);
15541560
}
15551561

15561562
ConstantLoader Packed(ConstantVector::get(PackedVals), Subtarget, DL);

IGC/VectorCompiler/lib/GenXCodeGen/GenXEmulate.cpp

Lines changed: 103 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,10 @@ class GenXEmulate : public ModulePass {
175175
// handles genx_{XX}add_sat cases
176176
Value *visitGenxAddSat(CallInst &CI);
177177

178+
// handles genx_constanti
179+
Value *visitConstantI(CallInst &CI);
180+
178181
// [+] bitcast
179-
// [-] genx.constanti ?
180182
// [-] genx.scatter ?
181183
// [-] genx.gather ?
182184
Value *visitCallInst(CallInst &CI);
@@ -191,6 +193,7 @@ class GenXEmulate : public ModulePass {
191193
static bool isI64ToFP(const Instruction &I);
192194
static bool isI64Cmp(const Instruction &I);
193195
static bool isI64AddSat(const Instruction &I);
196+
static bool isI64Constant(const Instruction &I);
194197
static Value *detectBitwiseNot(BinaryOperator &);
195198
static Type *changeScalarType(Type *T, Type *NewTy);
196199

@@ -216,7 +219,7 @@ class GenXEmulate : public ModulePass {
216219
bool needsEmulation() const {
217220
return (SplitBuilder.IsI64Operation() || isI64Cmp(Inst) ||
218221
isConvertOfI64(Inst) || isI64PointerOp(Inst) ||
219-
isI64AddSat(Inst));
222+
isI64AddSat(Inst) || isI64Constant(Inst));
220223
}
221224

222225
IRBuilder getIRBuilder() {
@@ -454,6 +457,13 @@ bool GenXEmulate::Emu64Expander::isI64AddSat(const Instruction &I) {
454457
}
455458
return false;
456459
}
460+
bool GenXEmulate::Emu64Expander::isI64Constant(const Instruction &I) {
461+
auto IID = vc::getAnyIntrinsicID(&I);
462+
if (IID != GenXIntrinsic::genx_constanti)
463+
return false;
464+
Type *RetTy = I.getType();
465+
return RetTy->isIntOrIntVectorTy(64);
466+
}
457467

458468
Value *GenXEmulate::Emu64Expander::detectBitwiseNot(BinaryOperator &Op) {
459469
if (Instruction::Xor != Op.getOpcode())
@@ -564,8 +574,24 @@ Value *GenXEmulate::Emu64Expander::visitAdd(BinaryOperator &Op) {
564574
auto AddcRes = buildAddc(Inst.getModule(), Builder, *Src0.Lo, *Src1.Lo,
565575
"int_emu.add64.lo.");
566576
auto *AddLo = AddcRes.Val;
567-
auto *AddHi =
568-
buildTernaryAddition(Builder, *AddcRes.CB, *Src0.Hi, *Src1.Hi, "add_hi");
577+
Value *AddHi = nullptr;
578+
579+
auto *Src0HiConst = dyn_cast<Constant>(Src0.Hi);
580+
auto *Src1HiConst = dyn_cast<Constant>(Src1.Hi);
581+
if (Src0HiConst && Src1HiConst && Src0HiConst->isZeroValue() &&
582+
Src1HiConst->isZeroValue()) {
583+
// Optimization for the case when both high parts are zero
584+
AddHi = AddcRes.CB;
585+
} else if (Src0HiConst && Src0HiConst->isZeroValue()) {
586+
// Optimization for the case when src0 high part is zero
587+
AddHi = Builder.CreateAdd(AddcRes.CB, Src1.Hi, "add_hi");
588+
} else if (Src1HiConst && Src1HiConst->isZeroValue()) {
589+
// Optimization for the case when src1 high part is zero
590+
AddHi = Builder.CreateAdd(AddcRes.CB, Src0.Hi, "add_hi");
591+
} else {
592+
AddHi = buildTernaryAddition(Builder, *AddcRes.CB, *Src0.Hi, *Src1.Hi,
593+
"add_hi");
594+
}
569595
return SplitBuilder.combineLoHiSplit(
570596
{AddLo, AddHi}, Twine("int_emu.") + Op.getOpcodeName() + ".",
571597
Inst.getType()->isIntegerTy());
@@ -589,9 +615,26 @@ Value *GenXEmulate::Emu64Expander::visitSub(BinaryOperator &Op) {
589615
auto *Borrow =
590616
Builder.CreateExtractValue(SubbVal, {IdxSubb_Borrow}, "subb.borrow");
591617
auto *MinusBorrow = Builder.CreateNeg(Borrow, "borrow.negate");
592-
auto *MinusS1Hi = Builder.CreateNeg(Src1.Hi, "negative.src1_hi");
593-
auto *SubHi = buildTernaryAddition(Builder, *Src0.Hi, *MinusBorrow,
594-
*MinusS1Hi, "sub_hi");
618+
619+
Value *SubHi = nullptr;
620+
621+
auto *Src0HiConst = dyn_cast<Constant>(Src0.Hi);
622+
auto *Src1HiConst = dyn_cast<Constant>(Src1.Hi);
623+
if (Src0HiConst && Src1HiConst && Src0HiConst->isZeroValue() &&
624+
Src1HiConst->isZeroValue()) {
625+
// Optimization for the case when both high parts are zero
626+
SubHi = MinusBorrow;
627+
} else if (Src0HiConst && Src0HiConst->isZeroValue()) {
628+
// Optimization for the case when src0 high part is zero
629+
SubHi = Builder.CreateSub(MinusBorrow, Src1.Hi, "sub_hi");
630+
} else if (Src1HiConst && Src1HiConst->isZeroValue()) {
631+
// Optimization for the case when src1 high part is zero
632+
SubHi = Builder.CreateAdd(Src0.Hi, MinusBorrow, "sub_hi");
633+
} else {
634+
auto *MinusS1Hi = Builder.CreateNeg(Src1.Hi, "negative.src1_hi");
635+
SubHi = buildTernaryAddition(Builder, *Src0.Hi, *MinusBorrow, *MinusS1Hi,
636+
"sub_hi");
637+
}
595638
return SplitBuilder.combineLoHiSplit(
596639
{SubLo, SubHi}, Twine("int_emu.") + Op.getOpcodeName() + ".",
597640
Inst.getType()->isIntegerTy());
@@ -1206,6 +1249,57 @@ Value *GenXEmulate::Emu64Expander::visitGenxAddSat(CallInst &CI) {
12061249
return Result;
12071250
}
12081251

1252+
Value *GenXEmulate::Emu64Expander::visitConstantI(CallInst &CI) {
1253+
auto Builder = getIRBuilder();
1254+
auto *M = CI.getModule();
1255+
auto &Ctx = M->getContext();
1256+
1257+
auto *Src = CI.getOperand(0);
1258+
auto *SrcTy = cast<IGCLLVM::FixedVectorType>(Src->getType());
1259+
auto NumElements = SrcTy->getNumElements();
1260+
1261+
SmallVector<uint32_t, 8> Values;
1262+
bool CanBeZext = false;
1263+
1264+
if (auto *SrcC = dyn_cast<ConstantDataVector>(Src)) {
1265+
CanBeZext = true;
1266+
for (unsigned I = 0; CanBeZext && I < NumElements; I++) {
1267+
auto Val = SrcC->getElementAsInteger(I);
1268+
CanBeZext &= Val <= std::numeric_limits<uint32_t>::max();
1269+
Values.emplace_back(static_cast<uint32_t>(Val));
1270+
}
1271+
}
1272+
1273+
Value *Result = nullptr;
1274+
if (CanBeZext) {
1275+
// Can be represented as zext from 32-bit values, so create 32-bit constants
1276+
// and zext it to 64-bit.
1277+
auto *NewSrc = ConstantDataVector::get(Ctx, Values);
1278+
auto *NewTy = NewSrc->getType();
1279+
IGC_ASSERT_EXIT(NewTy->getScalarSizeInBits() == 32);
1280+
1281+
auto *NewIntr =
1282+
vc::getAnyDeclaration(M, GenXIntrinsic::genx_constanti, {NewTy});
1283+
auto *NewConst = Builder.CreateCall(NewIntr, {NewSrc});
1284+
Result = Builder.CreateZExt(NewConst, CI.getType());
1285+
} else {
1286+
// Cannot be represented as zext from 32-bit values, so bitcast the
1287+
// <N x i64> constant to <2N x i32>.
1288+
auto *NewTy =
1289+
IGCLLVM::FixedVectorType::get(Builder.getInt32Ty(), NumElements * 2);
1290+
1291+
auto *Cast = Builder.CreateBitCast(Src, NewTy);
1292+
IGC_ASSERT_EXIT(isa<Constant>(Cast));
1293+
1294+
auto *NewIntr =
1295+
vc::getAnyDeclaration(M, GenXIntrinsic::genx_constanti, {NewTy});
1296+
auto *NewConst = Builder.CreateCall(NewIntr, {Cast});
1297+
Result = Builder.CreateBitCast(NewConst, CI.getType());
1298+
}
1299+
1300+
return Result;
1301+
}
1302+
12091303
Value *GenXEmulate::Emu64Expander::visitCallInst(CallInst &CI) {
12101304
switch (vc::getAnyIntrinsicID(&Inst)) {
12111305
case Intrinsic::smax:
@@ -1225,6 +1319,8 @@ Value *GenXEmulate::Emu64Expander::visitCallInst(CallInst &CI) {
12251319
case GenXIntrinsic::genx_uuadd_sat:
12261320
case GenXIntrinsic::genx_ssadd_sat:
12271321
return visitGenxAddSat(CI);
1322+
case GenXIntrinsic::genx_constanti:
1323+
return visitConstantI(CI);
12281324
}
12291325
return nullptr;
12301326
}

IGC/VectorCompiler/lib/GenXCodeGen/GenXUtil.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ SPDX-License-Identifier: MIT
2323
#include "vc/Utils/GenX/InternalMetadata.h"
2424
#include "vc/Utils/GenX/PredefinedVariable.h"
2525
#include "vc/Utils/GenX/Printf.h"
26+
#include "vc/Utils/GenX/Region.h"
2627
#include "vc/Utils/General/Types.h"
2728

2829
#include "llvmWrapper/IR/Instructions.h"
@@ -1240,7 +1241,97 @@ IVSplitter::HalfSplit IVSplitter::splitOperandHalf(unsigned SourceIdx,
12401241
return splitValueHalf(*Inst.getOperand(SourceIdx), FoldConstants);
12411242
}
12421243

1244+
// Returns tuple of InsertTo, InsertV, Offset if Src is a legalized
1245+
// <2;1,0> write-region with full mask, otherwise nullptrs and 0.
1246+
static std::tuple<Value *, Value *, unsigned> getInsertedValue(Value *Src) {
1247+
auto *Ty = cast<IGCLLVM::FixedVectorType>(Src->getType());
1248+
auto NumElements = Ty->getNumElements();
1249+
IGC_ASSERT(Ty->getElementType()->isIntegerTy(32));
1250+
1251+
auto *I = dyn_cast<CallInst>(Src);
1252+
if (!I || !GenXIntrinsic::isWrRegion(I))
1253+
return {nullptr, nullptr, 0};
1254+
1255+
vc::CMRegion Wr(I);
1256+
// Only direct, full-mask regions are supported.
1257+
if (Wr.Indirect || Wr.Mask)
1258+
return {nullptr, nullptr, 0};
1259+
1260+
// Support only 1D regions with stride == 2
1261+
if (!Wr.is1D() || Wr.Stride != 2 || Wr.Width != NumElements / 2)
1262+
return {nullptr, nullptr, 0};
1263+
1264+
auto *InsertTo = I->getArgOperand(0);
1265+
auto *InsertV = I->getArgOperand(1);
1266+
unsigned Offset = Wr.Offset / 4; // in elements
1267+
1268+
auto *InsertTy = InsertV->getType();
1269+
if (InsertTy->isIntegerTy(32)) {
1270+
IRBuilder<> Builder(I);
1271+
auto *VTy = IGCLLVM::FixedVectorType::get(InsertTy, 1);
1272+
InsertV = Builder.CreateBitCast(InsertV, VTy);
1273+
}
1274+
1275+
return {InsertTo, InsertV, Offset};
1276+
}
1277+
1278+
static IVSplitter::LoHiSplit matchLoHiCombine(Value &V) {
1279+
auto *Cast = dyn_cast<BitCastInst>(&V);
1280+
if (!Cast)
1281+
return {nullptr, nullptr};
1282+
1283+
auto *SrcTy = Cast->getSrcTy();
1284+
if (!SrcTy->isIntOrIntVectorTy(32))
1285+
return {nullptr, nullptr};
1286+
1287+
Value *Lo = nullptr;
1288+
Value *Hi = nullptr;
1289+
1290+
// Check the first write-region
1291+
auto [InsertTo, InsertV, Offset] = getInsertedValue(Cast->getOperand(0));
1292+
if (!InsertTo || !InsertV || (Offset != 1 && Offset != 0))
1293+
return {nullptr, nullptr};
1294+
1295+
if (Offset == 0)
1296+
Lo = InsertV;
1297+
else
1298+
Hi = InsertV;
1299+
1300+
// Check the second write-region
1301+
std::tie(InsertTo, InsertV, Offset) = getInsertedValue(InsertTo);
1302+
if (!InsertTo || !InsertV || (Offset != 1 && Offset != 0))
1303+
return {nullptr, nullptr};
1304+
1305+
if (Offset == 0)
1306+
Lo = InsertV;
1307+
else
1308+
Hi = InsertV;
1309+
1310+
return {Lo, Hi};
1311+
}
1312+
12431313
IVSplitter::LoHiSplit IVSplitter::splitValueLoHi(Value &V, bool FoldConstants) {
1314+
// When the source is a zext from i32 to i64, we can just take the source
1315+
// and a zero constant as the lo/hi parts.
1316+
if (auto *ZExt = dyn_cast<ZExtInst>(&V)) {
1317+
auto *Src = ZExt->getOperand(0);
1318+
auto *SrcTy = Src->getType();
1319+
if (SrcTy->isIntegerTy(32)) {
1320+
IRBuilder<> Builder(ZExt);
1321+
auto *VTy = IGCLLVM::FixedVectorType::get(SrcTy, 1);
1322+
auto *Cast = Builder.CreateBitCast(Src, VTy);
1323+
return {Cast, Constant::getNullValue(VTy)};
1324+
}
1325+
if (SrcTy->isIntOrIntVectorTy(32)) {
1326+
// special case: splitting a zext of i32 to i64
1327+
return {Src, Constant::getNullValue(SrcTy)};
1328+
}
1329+
}
1330+
1331+
auto MaybeSplit = matchLoHiCombine(V);
1332+
if (MaybeSplit.Lo && MaybeSplit.Hi)
1333+
return MaybeSplit;
1334+
12441335
auto Splitted = splitValue(V, RegionType::LoRegion, ".LoSplit",
12451336
RegionType::HiRegion, ".HiSplit", FoldConstants);
12461337
return {Splitted.first, Splitted.second};

IGC/VectorCompiler/test/Emulation/emu_i64_add64.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,38 @@ define dllexport spir_kernel void @test_kernel_constant(<8 x i64> %left) {
101101
ret void
102102
}
103103

104+
; COM: ===============================
105+
; COM: TEST #4
106+
; COM: ===============================
107+
; COM: add64 with zero-extended src1 operand:
108+
; COM: 1. operands are splitted to lo/hi parts
109+
; COM: 2. [add_lo, carry] = genx_addc(src0.l0, src1.lo)
110+
; COM: 3. add_hi = add carry, src0.hi
111+
; COM: 4. add64 = combine(add_lo,add_hi)
112+
113+
; CHECK-LABEL: @test_kernel_zext
114+
115+
; CHECK: [[IV1:%[^ ]+.iv32cast[0-9]*]] = bitcast <[[OT:8 x i64]]> %left to <[[CT:16 x i32]]>
116+
; CHECK-NEXT: [[Lo_l:%[^ ]+.LoSplit[0-9]*]] = call <[[ET:8 x i32]]> [[rgn:@llvm.genx.rdregioni.[^(]+]](<[[CT]]> [[IV1]], [[low_reg:i32 0, i32 8, i32 2, i16 0,]]
117+
; CHECK-NEXT: [[Hi_l:%[^ ]+.HiSplit[0-9]*]] = call <[[ET]]> [[rgn]](<[[CT]]> [[IV1]], [[high_reg:i32 0, i32 8, i32 2, i16 4,]]
118+
119+
; CHECK: [[ADDC:%[^ ]+]] = call { <[[ET]]>, <[[ET]]> } @llvm.genx.addc.{{[^(]+}}(<[[ET]]> [[Lo_l]], <[[ET]]> %right_i32)
120+
; CHECK-NEXT: [[ADDC_ADD:%[^ ]+]] = extractvalue { <[[ET]]>, <[[ET]]> } [[ADDC]], 1
121+
; CHECK-NEXT: [[ADDC_CARRY:%[^ ]+]] = extractvalue { <[[ET]]>, <[[ET]]> } [[ADDC]], 0
122+
; CHECK-NEXT: [[Add_Hi:%add_hi]] = add <[[ET]]> [[ADDC_CARRY]], [[Hi_l]]
123+
124+
define dllexport spir_kernel void @test_kernel_zext(i32 %0, i32 %1, i32 %2) {
125+
%left = tail call <8 x i64> @llvm.genx.oword.ld.v8i64(i32 0, i32 %0, i32 0)
126+
%right_i32 = tail call <8 x i32> @llvm.genx.oword.ld.v8i32(i32 0, i32 %1, i32 0)
127+
%right = zext <8 x i32> %right_i32 to <8 x i64>
128+
%add64 = add <8 x i64> %left, %right
129+
tail call void @llvm.genx.oword.st.v8i64(i32 %2, i32 0, <8 x i64> %add64)
130+
ret void
131+
}
132+
104133
declare i64 @llvm.genx.rdregioni.i64.v8i64.i16(<8 x i64>, i32, i32, i32, i16, i32)
105134
declare <8 x i32> @llvm.genx.add3.v8i32.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
106135
declare { <8 x i32>, <8 x i32> } @llvm.genx.addc.v8i32.v8i32(<8 x i32>, <8 x i32>)
107136
declare <8 x i64> @llvm.genx.oword.ld.v8i64(i32, i32, i32)
137+
declare <8 x i32> @llvm.genx.oword.ld.v8i32(i32, i32, i32)
108138
declare void @llvm.genx.oword.st.v8i64(i32, i32, <8 x i64>)

IGC/VectorCompiler/test/Emulation/emu_i64_genx_absi.ll

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,11 @@ declare <2 x i64> @llvm.genx.absi.v2i64(<2 x i64>)
2929
; CHECK-NEXT: [[SUBB_SUB:%[^ ]+]] = extractvalue { <[[ET]]>, <[[ET]]> } [[SUBB]], 1
3030
; CHECK-NEXT: [[SUBB_BORROW:%[^ ]+]] = extractvalue { <[[ET]]>, <[[ET]]> } [[SUBB]], 0
3131
; CHECK-NEXT: [[BORROW_NEGATE:%[^ ]+]] = sub <[[ET]]> zeroinitializer, [[SUBB_BORROW]]
32-
; CHECK-NEXT: [[Hi_r_negate:%[^ ]+]] = sub <[[ET]]> zeroinitializer, [[Sub_Hi_r]]
33-
; CHECK-NEXT: [[Sub_Hi_Part:%[^ ]+]] = add <[[ET]]> zeroinitializer, [[BORROW_NEGATE]]
34-
; CHECK-NEXT: [[Sub_Hi:%[^ ]+]] = add <[[ET]]> [[Sub_Hi_Part]], [[Hi_r_negate]]
32+
; CHECK-NEXT: [[Sub_Hi:%[^ ]+]] = sub <[[ET]]> [[BORROW_NEGATE]], [[Sub_Hi_r]]
3533

36-
; CHECK-NEXT: [[P_JOIN:%[^ ]+]] = call <[[CT]]> @llvm.genx.wrregioni.{{[^(]+}}(<[[CT]]> undef, <[[ET]]> [[SUBB_SUB]], [[low_reg]]
37-
; CHECK-NEXT: [[JOINED:%[^ ]+]] = call <[[CT]]> @llvm.genx.wrregioni.{{[^(]+}}(<[[CT]]> [[P_JOIN]], <[[ET]]> [[Sub_Hi]], [[high_reg]]
38-
; CHECK-NEXT: [[RECAST:%[^ ]+]] = bitcast <[[CT]]> [[JOINED]] to <[[RT]]>
39-
; CHECK-NEXT: [[RECAST2:%[^ ]+]] = bitcast <[[RT]]> [[RECAST]] to <[[CT]]>
40-
41-
; CHECK-NEXT: [[NegLo:%[^ ]+]] = call <[[ET]]> [[rgn]](<[[CT]]> [[RECAST2]], [[low_reg]]
42-
; CHECK-NEXT: [[NegHi:%[^ ]+]] = call <[[ET]]> [[rgn]](<[[CT]]> [[RECAST2]], [[high_reg]]
43-
; CHECK-NEXT: [[CMP:%[^ ]+]] = icmp slt <[[ET]]> [[Hi_l]], zeroinitializer
44-
; CHECK-NEXT: [[SEL_LO:%[^ ]+]] = select <2 x i1> [[CMP]], <[[ET]]> [[NegLo]], <[[ET]]> [[Lo_l]]
45-
; CHECK-NEXT: [[SEL_HI:%[^ ]+]] = select <2 x i1> [[CMP]], <[[ET]]> [[NegHi]], <[[ET]]> [[Hi_l]]
34+
; CHECK: [[CMP:%[^ ]+]] = icmp slt <[[ET]]> [[Hi_l]], zeroinitializer
35+
; CHECK-NEXT: [[SEL_LO:%[^ ]+]] = select <2 x i1> [[CMP]], <[[ET]]> [[SUBB_SUB]], <[[ET]]> [[Lo_l]]
36+
; CHECK-NEXT: [[SEL_HI:%[^ ]+]] = select <2 x i1> [[CMP]], <[[ET]]> [[Sub_Hi]], <[[ET]]> [[Hi_l]]
4637

4738
; CHECK-NEXT: [[P_JOIN:%[^ ]+]] = call <[[CT]]> @llvm.genx.wrregioni.{{[^(]+}}(<[[CT]]> undef, <[[ET]]> [[SEL_LO]], [[low_reg]]
4839
; CHECK-NEXT: [[JOINED:%[^ ]+]] = call <[[CT]]> @llvm.genx.wrregioni.{{[^(]+}}(<[[CT]]> [[P_JOIN]], <[[ET]]> [[SEL_HI]], [[high_reg]]

IGC/VectorCompiler/test/Emulation/emu_i64_genx_ssaddsat.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ define dllexport spir_kernel void @test_ssaddsat_si64(i64 %lsop, i64 %rsop) {
157157
; CHECK-NEXT: [[JOINED:%[^ ]+]] = call <[[CT]]> @llvm.genx.wrregioni.{{[^(]+}}(<[[CT]]> [[P_JOIN]], <[[ET]]> [[Hi]], [[high_reg]]
158158
; CHECK-NEXT: [[RECAST:%[^ ]+]] = bitcast <[[CT]]> [[JOINED]] to <[[RT]]>
159159
; COM: here emulation of i64->i16 sstrunc.sat, we don't have rigous checks for these
160-
; CHECK-NEXT: [[CAST_PREPARE:%[^ ]+]] = bitcast <[[RT]]> [[RECAST]] to <[[CT]]>
161160
; COM: ...
162161
; CHECK: [[SATURATE_I32:%[^ ]+]] = call <2 x i16> @llvm.genx.sstrunc.sat.v2i16.v2i32(<2 x i32> {{[^)]+}})
163162
; CHECK-NEXT: [[USER:%[^ ]+]] = bitcast <2 x i16> [[SATURATE_I32]] to <2 x i16>

0 commit comments

Comments
 (0)