[SLP]Improve/fix CSE analysis of the blocks/instructions.
Added analysis for invariant extractelement instructions and improved detection of the CSE blocks for generated extractelement instructions. Differential Revision: https://reviews.llvm.org/D135279
This commit is contained in:
parent
7850df3de0
commit
323ed2308a
|
@ -2714,8 +2714,9 @@ private:
|
||||||
/// Values used only by @llvm.assume calls.
|
/// Values used only by @llvm.assume calls.
|
||||||
SmallPtrSet<const Value *, 32> EphValues;
|
SmallPtrSet<const Value *, 32> EphValues;
|
||||||
|
|
||||||
/// Holds all of the instructions that we gathered.
|
/// Holds all of the instructions that we gathered, shuffle instructions and
|
||||||
SetVector<Instruction *> GatherShuffleSeq;
|
/// extractelements.
|
||||||
|
SetVector<Instruction *> GatherShuffleExtractSeq;
|
||||||
|
|
||||||
/// A list of blocks that we are going to CSE.
|
/// A list of blocks that we are going to CSE.
|
||||||
SetVector<BasicBlock *> CSEBlocks;
|
SetVector<BasicBlock *> CSEBlocks;
|
||||||
|
@ -7786,7 +7787,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
|
||||||
auto *InsElt = dyn_cast<InsertElementInst>(Vec);
|
auto *InsElt = dyn_cast<InsertElementInst>(Vec);
|
||||||
if (!InsElt)
|
if (!InsElt)
|
||||||
return Vec;
|
return Vec;
|
||||||
GatherShuffleSeq.insert(InsElt);
|
GatherShuffleExtractSeq.insert(InsElt);
|
||||||
CSEBlocks.insert(InsElt->getParent());
|
CSEBlocks.insert(InsElt->getParent());
|
||||||
// Add to our 'need-to-extract' list.
|
// Add to our 'need-to-extract' list.
|
||||||
if (TreeEntry *Entry = getTreeEntry(V)) {
|
if (TreeEntry *Entry = getTreeEntry(V)) {
|
||||||
|
@ -7940,7 +7941,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
|
||||||
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
|
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
|
||||||
}
|
}
|
||||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8005,7 +8006,7 @@ Value *BoUpSLP::createBuildVector(ArrayRef<Value *> VL) {
|
||||||
VL = UniqueValues;
|
VL = UniqueValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
|
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq,
|
||||||
CSEBlocks);
|
CSEBlocks);
|
||||||
Value *Vec = gather(VL);
|
Value *Vec = gather(VL);
|
||||||
if (!ReuseShuffleIndicies.empty()) {
|
if (!ReuseShuffleIndicies.empty()) {
|
||||||
|
@ -8025,7 +8026,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
|
|
||||||
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
|
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
|
||||||
unsigned VF = E->getVectorFactor();
|
unsigned VF = E->getVectorFactor();
|
||||||
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
|
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleExtractSeq,
|
||||||
CSEBlocks);
|
CSEBlocks);
|
||||||
if (E->State == TreeEntry::NeedToGather) {
|
if (E->State == TreeEntry::NeedToGather) {
|
||||||
if (E->getMainOp())
|
if (E->getMainOp())
|
||||||
|
@ -8041,7 +8042,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
|
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
|
||||||
Entries.back()->VectorizedValue, Mask);
|
Entries.back()->VectorizedValue, Mask);
|
||||||
if (auto *I = dyn_cast<Instruction>(Vec)) {
|
if (auto *I = dyn_cast<Instruction>(Vec)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -8173,7 +8174,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
if (!IsIdentity || NumElts != NumScalars) {
|
if (!IsIdentity || NumElts != NumScalars) {
|
||||||
V = Builder.CreateShuffleVector(V, Mask);
|
V = Builder.CreateShuffleVector(V, Mask);
|
||||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8191,7 +8192,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
V = Builder.CreateShuffleVector(
|
V = Builder.CreateShuffleVector(
|
||||||
V, InsertMask, cast<Instruction>(E->Scalars.back())->getName());
|
V, InsertMask, cast<Instruction>(E->Scalars.back())->getName());
|
||||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
// Create freeze for undef values.
|
// Create freeze for undef values.
|
||||||
|
@ -8209,7 +8210,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
FirstInsert->getOperand(0), V, InsertMask,
|
FirstInsert->getOperand(0), V, InsertMask,
|
||||||
cast<Instruction>(E->Scalars.back())->getName());
|
cast<Instruction>(E->Scalars.back())->getName());
|
||||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8587,7 +8588,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
// instruction, if any.
|
// instruction, if any.
|
||||||
for (Value *V : {V0, V1}) {
|
for (Value *V : {V0, V1}) {
|
||||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8611,7 +8612,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||||
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
|
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
|
||||||
if (auto *I = dyn_cast<Instruction>(V)) {
|
if (auto *I = dyn_cast<Instruction>(V)) {
|
||||||
V = propagateMetadata(I, E->Scalars);
|
V = propagateMetadata(I, E->Scalars);
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
V = ShuffleBuilder.finalize(V);
|
V = ShuffleBuilder.finalize(V);
|
||||||
|
@ -8711,6 +8712,12 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
} else {
|
} else {
|
||||||
Ex = Builder.CreateExtractElement(Vec, Lane);
|
Ex = Builder.CreateExtractElement(Vec, Lane);
|
||||||
}
|
}
|
||||||
|
// The then branch of the previous if may produce constants, since 0
|
||||||
|
// operand might be a constant.
|
||||||
|
if (auto *ExI = dyn_cast<Instruction>(Ex)) {
|
||||||
|
GatherShuffleExtractSeq.insert(ExI);
|
||||||
|
CSEBlocks.insert(ExI->getParent());
|
||||||
|
}
|
||||||
// If necessary, sign-extend or zero-extend ScalarRoot
|
// If necessary, sign-extend or zero-extend ScalarRoot
|
||||||
// to the larger type.
|
// to the larger type.
|
||||||
if (!MinBWs.count(ScalarRoot))
|
if (!MinBWs.count(ScalarRoot))
|
||||||
|
@ -8740,7 +8747,6 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
Builder.SetInsertPoint(&F->getEntryBlock().front());
|
Builder.SetInsertPoint(&F->getEntryBlock().front());
|
||||||
}
|
}
|
||||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
||||||
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
|
|
||||||
auto &NewInstLocs = ExternallyUsedValues[NewInst];
|
auto &NewInstLocs = ExternallyUsedValues[NewInst];
|
||||||
auto It = ExternallyUsedValues.find(Scalar);
|
auto It = ExternallyUsedValues.find(Scalar);
|
||||||
assert(It != ExternallyUsedValues.end() &&
|
assert(It != ExternallyUsedValues.end() &&
|
||||||
|
@ -8832,20 +8838,17 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
|
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
|
||||||
}
|
}
|
||||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
||||||
CSEBlocks.insert(PH->getIncomingBlock(i));
|
|
||||||
PH->setOperand(i, NewInst);
|
PH->setOperand(i, NewInst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Builder.SetInsertPoint(cast<Instruction>(User));
|
Builder.SetInsertPoint(cast<Instruction>(User));
|
||||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
||||||
CSEBlocks.insert(cast<Instruction>(User)->getParent());
|
|
||||||
User->replaceUsesOfWith(Scalar, NewInst);
|
User->replaceUsesOfWith(Scalar, NewInst);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Builder.SetInsertPoint(&F->getEntryBlock().front());
|
Builder.SetInsertPoint(&F->getEntryBlock().front());
|
||||||
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
|
||||||
CSEBlocks.insert(&F->getEntryBlock());
|
|
||||||
User->replaceUsesOfWith(Scalar, NewInst);
|
User->replaceUsesOfWith(Scalar, NewInst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8959,7 +8962,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
|
Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
|
||||||
CombinedMask1);
|
CombinedMask1);
|
||||||
if (auto *I = dyn_cast<Instruction>(Vec)) {
|
if (auto *I = dyn_cast<Instruction>(Vec)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
return Vec;
|
return Vec;
|
||||||
|
@ -8974,7 +8977,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
!IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) {
|
!IsIdentityMask(CombinedMask, cast<FixedVectorType>(Op->getType()))) {
|
||||||
Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask);
|
Value *Vec = Builder.CreateShuffleVector(Op, CombinedMask);
|
||||||
if (auto *I = dyn_cast<Instruction>(Vec)) {
|
if (auto *I = dyn_cast<Instruction>(Vec)) {
|
||||||
GatherShuffleSeq.insert(I);
|
GatherShuffleExtractSeq.insert(I);
|
||||||
CSEBlocks.insert(I->getParent());
|
CSEBlocks.insert(I->getParent());
|
||||||
}
|
}
|
||||||
return Vec;
|
return Vec;
|
||||||
|
@ -9114,10 +9117,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void BoUpSLP::optimizeGatherSequence() {
|
void BoUpSLP::optimizeGatherSequence() {
|
||||||
LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size()
|
LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleExtractSeq.size()
|
||||||
<< " gather sequences instructions.\n");
|
<< " gather sequences instructions.\n");
|
||||||
// LICM InsertElementInst sequences.
|
// LICM InsertElementInst sequences.
|
||||||
for (Instruction *I : GatherShuffleSeq) {
|
for (Instruction *I : GatherShuffleExtractSeq) {
|
||||||
if (isDeleted(I))
|
if (isDeleted(I))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -9219,7 +9222,7 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||||
if (isDeleted(&In))
|
if (isDeleted(&In))
|
||||||
continue;
|
continue;
|
||||||
if (!isa<InsertElementInst, ExtractElementInst, ShuffleVectorInst>(&In) &&
|
if (!isa<InsertElementInst, ExtractElementInst, ShuffleVectorInst>(&In) &&
|
||||||
!GatherShuffleSeq.contains(&In))
|
!GatherShuffleExtractSeq.contains(&In))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Check if we can replace this instruction with any of the
|
// Check if we can replace this instruction with any of the
|
||||||
|
@ -9238,7 +9241,7 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
|
if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
|
||||||
GatherShuffleSeq.contains(V) &&
|
GatherShuffleExtractSeq.contains(V) &&
|
||||||
IsIdenticalOrLessDefined(V, &In, NewMask) &&
|
IsIdenticalOrLessDefined(V, &In, NewMask) &&
|
||||||
DT->dominates(In.getParent(), V->getParent())) {
|
DT->dominates(In.getParent(), V->getParent())) {
|
||||||
In.moveAfter(V);
|
In.moveAfter(V);
|
||||||
|
@ -9259,7 +9262,7 @@ void BoUpSLP::optimizeGatherSequence() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CSEBlocks.clear();
|
CSEBlocks.clear();
|
||||||
GatherShuffleSeq.clear();
|
GatherShuffleExtractSeq.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
BoUpSLP::ScheduleData *
|
BoUpSLP::ScheduleData *
|
||||||
|
|
|
@ -11,9 +11,9 @@ define void @test(i32* %ptr, i32* noalias %s) {
|
||||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
|
||||||
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[S:%.*]] to <4 x i32>*
|
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[S:%.*]] to <4 x i32>*
|
||||||
; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
|
; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
|
||||||
; CHECK-NEXT: br label [[LOOP1:%.*]]
|
; CHECK-NEXT: br label [[LOOP1:%.*]]
|
||||||
; CHECK: loop1:
|
; CHECK: loop1:
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
|
|
||||||
; CHECK-NEXT: store i32 [[TMP3]], i32* [[S]], align 4
|
; CHECK-NEXT: store i32 [[TMP3]], i32* [[S]], align 4
|
||||||
; CHECK-NEXT: br i1 true, label [[LOOP1]], label [[CONT:%.*]]
|
; CHECK-NEXT: br i1 true, label [[LOOP1]], label [[CONT:%.*]]
|
||||||
; CHECK: cont:
|
; CHECK: cont:
|
||||||
|
|
Loading…
Reference in New Issue