[LV] Drop wrap flags for reductions using VP def-use chain.

Update clearReductionWrapFlags to use the VPlan def-use chain from the
reduction phi recipe to drop reduction wrap flags.

This addresses an existing FIXME and fixes a crash when instructions in
the reduction chain are not used and have been removed before VPlan
codegeneration.

Fixes #55540.
This commit is contained in:
Florian Hahn 2022-05-19 20:36:46 +01:00
parent e0b98902a2
commit c90235f0ef
No known key found for this signature in database
GPG Key ID: CF59919C6547A668
2 changed files with 57 additions and 21 deletions

View File

@ -581,7 +581,7 @@ protected:
void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
/// Clear NSW/NUW flags from reduction instructions if necessary.
void clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
VPTransformState &State);
/// Fixup the LCSSA phi nodes in the unique exit block. This simply
@ -3884,7 +3884,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
// Wrap flags are in general invalid after vectorization, clear them.
clearReductionWrapFlags(RdxDesc, State);
clearReductionWrapFlags(PhiR, State);
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
@ -4060,34 +4060,35 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}
void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &RdxDesc,
void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
VPTransformState &State) {
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RK != RecurKind::Add && RK != RecurKind::Mul)
return;
Instruction *LoopExitInstr = RdxDesc.getLoopExitInstr();
assert(LoopExitInstr && "null loop exit instruction");
SmallVector<Instruction *, 8> Worklist;
SmallPtrSet<Instruction *, 8> Visited;
Worklist.push_back(LoopExitInstr);
Visited.insert(LoopExitInstr);
SmallVector<VPValue *, 8> Worklist;
SmallPtrSet<VPValue *, 8> Visited;
Worklist.push_back(PhiR);
Visited.insert(PhiR);
while (!Worklist.empty()) {
Instruction *Cur = Worklist.pop_back_val();
if (isa<OverflowingBinaryOperator>(Cur))
for (unsigned Part = 0; Part < UF; ++Part) {
// FIXME: Should not rely on getVPValue at this point.
Value *V = State.get(State.Plan->getVPValue(Cur, true), Part);
cast<Instruction>(V)->dropPoisonGeneratingFlags();
VPValue *Cur = Worklist.pop_back_val();
for (unsigned Part = 0; Part < UF; ++Part) {
Value *V = State.get(Cur, Part);
if (!isa<OverflowingBinaryOperator>(V))
break;
cast<Instruction>(V)->dropPoisonGeneratingFlags();
}
for (User *U : Cur->users()) {
Instruction *UI = cast<Instruction>(U);
if ((Cur != LoopExitInstr || OrigLoop->contains(UI->getParent())) &&
Visited.insert(UI).second)
Worklist.push_back(UI);
}
for (VPUser *U : Cur->users()) {
auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
if (!UserRecipe)
continue;
for (VPValue *V : UserRecipe->definedValues())
if (Visited.insert(V).second)
Worklist.push_back(V);
}
}
}

View File

@ -478,3 +478,38 @@ exit:
store i32 %sum.lcssa, i32* %gep.dst.1, align 4
ret void
}
; Test for PR55540.
define void @test_drop_poison_generating_dead_recipe(i64* %dst) {
; CHECK-LABEL: @test_drop_poison_generating_dead_recipe(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP0]] = add <4 x i64> [[VEC_PHI]], <i64 -31364, i64 -31364, i64 -31364, i64 -31364>
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 360
; CHECK-NEXT: br i1 [[TMP1]], label %middle.block, label %vector.body
; CHECK: middle.block:
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]])
; CHECK-NEXT: store i64 [[TMP2]], i64* [[DST:%.*]], align 8
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 363, 360
; CHECK-NEXT: br i1 [[CMP_N]], label %exit, label %scalar.ph
; CHECK: scalar.ph:
;
entry:
br label %body
body:
%red = phi i64 [ 0, %entry ], [ %red.next, %body ]
%iv = phi i32 [ 2, %entry ], [ %iv.next, %body ]
%add.1 = add nuw i64 %red, -23523
store i64 %add.1, i64* %dst, align 8
%red.next = add nuw i64 %red, -31364
store i64 %red.next, i64* %dst, align 8
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp ugt i32 %iv, 363
br i1 %ec, label %exit, label %body
exit:
ret void
}