[LV] Drop wrap flags for reductions using VP def-use chain.
Update clearReductionWrapFlags to use the VPlan def-use chain from the reduction phi recipe to drop reduction wrap flags. This addresses an existing FIXME and fixes a crash when instructions in the reduction chain are not used and have been removed before VPlan codegeneration. Fixes #55540.
This commit is contained in:
parent
e0b98902a2
commit
c90235f0ef
|
@ -581,7 +581,7 @@ protected:
|
|||
void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
|
||||
|
||||
/// Clear NSW/NUW flags from reduction instructions if necessary.
|
||||
void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
|
||||
void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
|
||||
VPTransformState &State);
|
||||
|
||||
/// Fixup the LCSSA phi nodes in the unique exit block. This simply
|
||||
|
@ -3884,7 +3884,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
|
|||
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
|
||||
|
||||
// Wrap flags are in general invalid after vectorization, clear them.
|
||||
clearReductionWrapFlags(RdxDesc, State);
|
||||
clearReductionWrapFlags(PhiR, State);
|
||||
|
||||
// Before each round, move the insertion point right between
|
||||
// the PHIs and the values we are going to write.
|
||||
|
@ -4060,34 +4060,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
|
|||
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
|
||||
}
|
||||
|
||||
void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
|
||||
void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
|
||||
VPTransformState &State) {
|
||||
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
|
||||
RecurKind RK = RdxDesc.getRecurrenceKind();
|
||||
if (RK != RecurKind::Add && RK != RecurKind::Mul)
|
||||
return;
|
||||
|
||||
Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr();
|
||||
assert(LoopExitInstr && "null loop exit instruction");
|
||||
SmallVector<Instruction *, 8> Worklist;
|
||||
SmallPtrSet<Instruction *, 8> Visited;
|
||||
Worklist.push_back(LoopExitInstr);
|
||||
Visited.insert(LoopExitInstr);
|
||||
SmallVector<VPValue *, 8> Worklist;
|
||||
SmallPtrSet<VPValue *, 8> Visited;
|
||||
Worklist.push_back(PhiR);
|
||||
Visited.insert(PhiR);
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *Cur = Worklist.pop_back_val();
|
||||
if (isa<OverflowingBinaryOperator>(Cur))
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
// FIXME: Should not rely on getVPValue at this point.
|
||||
Value *V = State.get(State.Plan->getVPValue(Cur, true), Part);
|
||||
cast<Instruction>(V)->dropPoisonGeneratingFlags();
|
||||
VPValue *Cur = Worklist.pop_back_val();
|
||||
for (unsigned Part = 0; Part < UF; ++Part) {
|
||||
Value *V = State.get(Cur, Part);
|
||||
if (!isa<OverflowingBinaryOperator>(V))
|
||||
break;
|
||||
cast<Instruction>(V)->dropPoisonGeneratingFlags();
|
||||
}
|
||||
|
||||
for (User *U : Cur->users()) {
|
||||
Instruction *UI = cast<Instruction>(U);
|
||||
if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) &&
|
||||
Visited.insert(UI).second)
|
||||
Worklist.push_back(UI);
|
||||
}
|
||||
for (VPUser *U : Cur->users()) {
|
||||
auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
|
||||
if (!UserRecipe)
|
||||
continue;
|
||||
for (VPValue *V : UserRecipe->definedValues())
|
||||
if (Visited.insert(V).second)
|
||||
Worklist.push_back(V);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -478,3 +478,38 @@ exit:
|
|||
store i32 %sum.lcssa, i32* %gep.dst.1, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test for PR55540.
|
||||
define void @test_drop_poison_generating_dead_recipe(i64* %dst) {
|
||||
; CHECK-LABEL: @test_drop_poison_generating_dead_recipe(
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
|
||||
; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], <i64 -31364, i64 -31364, i64 -31364, i64 -31364>
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %middle.block, label %vector.body
|
||||
; CHECK: middle.block:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]])
|
||||
; CHECK-NEXT: store i64 [[TMP2]], i64* [[DST:%.*]], align 8
|
||||
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 363, 360
|
||||
; CHECK-NEXT: br i1 [[CMP_N]], label %exit, label %scalar.ph
|
||||
; CHECK: scalar.ph:
|
||||
;
|
||||
entry:
|
||||
br label %body
|
||||
|
||||
body:
|
||||
%red = phi i64 [ 0, %entry ], [ %red.next, %body ]
|
||||
%iv = phi i32 [ 2, %entry ], [ %iv.next, %body ]
|
||||
%add.1 = add nuw i64 %red, -23523
|
||||
store i64 %add.1, i64* %dst, align 8
|
||||
%red.next = add nuw i64 %red, -31364
|
||||
store i64 %red.next, i64* %dst, align 8
|
||||
%iv.next = add nuw nsw i32 %iv, 1
|
||||
%ec = icmp ugt i32 %iv, 363
|
||||
br i1 %ec, label %exit, label %body
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue