[SLP]Improve lookup of the buildvector top insertelement instruction.
When estimating the cost of the in-tree vectorized scalars in buildvector sequences, need to take into account the vectorized insertelement instruction. The top of the buildvector seuences is the topmost vectorized insertelement instruction, because it will have > than 1 use after the vectorization. For the affected test case improves througput from 21 to 16 (per llvm-mca). Differential Revision: https://reviews.llvm.org/D132740
This commit is contained in:
parent
9b1915cd0a
commit
e6345bf644
|
@ -6878,8 +6878,9 @@ InstructionCost BoUpSLP::getSpillCost() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if two insertelement instructions are from the same buildvector.
|
/// Check if two insertelement instructions are from the same buildvector.
|
||||||
static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
|
static bool areTwoInsertFromSameBuildVector(
|
||||||
InsertElementInst *V) {
|
InsertElementInst *VU, InsertElementInst *V,
|
||||||
|
function_ref<Value *(InsertElementInst *)> GetBaseOperand) {
|
||||||
// Instructions must be from the same basic blocks.
|
// Instructions must be from the same basic blocks.
|
||||||
if (VU->getParent() != V->getParent())
|
if (VU->getParent() != V->getParent())
|
||||||
return false;
|
return false;
|
||||||
|
@ -6906,14 +6907,14 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
|
||||||
getInsertIndex(IE1).value_or(Idx2) == Idx2)
|
getInsertIndex(IE1).value_or(Idx2) == Idx2)
|
||||||
IE1 = nullptr;
|
IE1 = nullptr;
|
||||||
else
|
else
|
||||||
IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
|
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
|
||||||
}
|
}
|
||||||
if (IE2) {
|
if (IE2) {
|
||||||
if ((IE2 != V && !IE2->hasOneUse()) ||
|
if ((IE2 != V && !IE2->hasOneUse()) ||
|
||||||
getInsertIndex(IE2).value_or(Idx1) == Idx1)
|
getInsertIndex(IE2).value_or(Idx1) == Idx1)
|
||||||
IE2 = nullptr;
|
IE2 = nullptr;
|
||||||
else
|
else
|
||||||
IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
|
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
|
||||||
}
|
}
|
||||||
} while (IE1 || IE2);
|
} while (IE1 || IE2);
|
||||||
return false;
|
return false;
|
||||||
|
@ -7117,12 +7118,18 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
|
||||||
Optional<unsigned> InsertIdx = getInsertIndex(VU);
|
Optional<unsigned> InsertIdx = getInsertIndex(VU);
|
||||||
if (InsertIdx) {
|
if (InsertIdx) {
|
||||||
const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
|
const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
|
||||||
auto *It =
|
auto *It = find_if(
|
||||||
find_if(FirstUsers,
|
FirstUsers,
|
||||||
[VU](const std::pair<Value *, const TreeEntry *> &Pair) {
|
[this, VU](const std::pair<Value *, const TreeEntry *> &Pair) {
|
||||||
return areTwoInsertFromSameBuildVector(
|
return areTwoInsertFromSameBuildVector(
|
||||||
VU, cast<InsertElementInst>(Pair.first));
|
VU, cast<InsertElementInst>(Pair.first),
|
||||||
});
|
[this](InsertElementInst *II) -> Value * {
|
||||||
|
Value *Op0 = II->getOperand(0);
|
||||||
|
if (getTreeEntry(II) && !getTreeEntry(Op0))
|
||||||
|
return nullptr;
|
||||||
|
return Op0;
|
||||||
|
});
|
||||||
|
});
|
||||||
int VecId = -1;
|
int VecId = -1;
|
||||||
if (It == FirstUsers.end()) {
|
if (It == FirstUsers.end()) {
|
||||||
(void)ShuffleMasks.emplace_back();
|
(void)ShuffleMasks.emplace_back();
|
||||||
|
@ -8590,7 +8597,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
|
find_if(ShuffledInserts, [VU](const ShuffledInsertData &Data) {
|
||||||
// Checks if 2 insertelements are from the same buildvector.
|
// Checks if 2 insertelements are from the same buildvector.
|
||||||
InsertElementInst *VecInsert = Data.InsertElements.front();
|
InsertElementInst *VecInsert = Data.InsertElements.front();
|
||||||
return areTwoInsertFromSameBuildVector(VU, VecInsert);
|
return areTwoInsertFromSameBuildVector(
|
||||||
|
VU, VecInsert,
|
||||||
|
[](InsertElementInst *II) { return II->getOperand(0); });
|
||||||
});
|
});
|
||||||
unsigned Idx = *InsertIdx;
|
unsigned Idx = *InsertIdx;
|
||||||
if (It == ShuffledInserts.end()) {
|
if (It == ShuffledInserts.end()) {
|
||||||
|
|
|
@ -24,6 +24,7 @@ define void @test(i32* nocapture %t2) {
|
||||||
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
|
; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]]
|
||||||
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
|
; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]]
|
||||||
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
|
; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433
|
||||||
|
; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270
|
||||||
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
|
; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137
|
||||||
; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
|
; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]]
|
||||||
; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
|
; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]]
|
||||||
|
@ -33,22 +34,20 @@ define void @test(i32* nocapture %t2) {
|
||||||
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
|
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
|
||||||
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
|
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
|
||||||
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
|
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[T27]], i32 0
|
; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[T47]], i32 1
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T15]], i32 0
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T15]], i32 2
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T40]], i32 1
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T40]], i32 3
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 6270, i32 poison, i32 poison, i32 poison>, i32 [[T40]], i32 1
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T9]], i32 2
|
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[T48]], i32 3
|
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP7]]
|
; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2
|
||||||
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP7]]
|
; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
|
||||||
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
|
||||||
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
|
||||||
; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 6, i32 7>
|
|
||||||
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
|
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
|
||||||
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[T71]], <8 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 9>
|
; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
|
||||||
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[TMP13]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||||
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
|
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
|
||||||
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
|
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
|
|
Loading…
Reference in New Issue