[LoopInterchange] Support loop interchange with floating point reductions

Enabled loop interchange support for floating point reductions
if it is allowed to reorder floating point operations.

Previously when we encouter a floating point PHI node in the
outer loop exit block, we bailed out since we could not detect
floating point reductions in the early days. Now we remove this
limiation since we are able to detect floating point reductions.

Reviewed By: #loopoptwg, Meinersbur

Differential Revision: https://reviews.llvm.org/D117450
This commit is contained in:
Congzhe Cao 2022-02-06 16:55:20 -05:00 committed by CongzheUalberta
parent 0d8850ae2c
commit 1ef04326ec
3 changed files with 107 additions and 26 deletions

View File

@ -733,8 +733,12 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
if (PHI->getNumIncomingValues() == 1)
continue;
RecurrenceDescriptor RD;
if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
// Detect floating point reduction only when it can be reordered.
if (RD.getExactFPMathInst() != nullptr)
return nullptr;
return PHI;
}
return nullptr;
}
}
@ -893,28 +897,23 @@ areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
for (PHINode &PHI : LoopNestExit->phis()) {
// FIXME: We currently are not able to detect floating point reductions
// and have to use floating point PHIs as a proxy to prevent
// interchanging in the presence of floating point reductions.
if (PHI.getType()->isFloatingPointTy())
return false;
for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
continue;
Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
continue;
// The incoming value is defined in the outer loop latch. Currently we
// only support that in case the outer loop latch has a single predecessor.
// This guarantees that the outer loop latch is executed if and only if
// the inner loop is executed (because tightlyNested() guarantees that the
// outer loop header only branches to the inner loop or the outer loop
// latch).
// FIXME: We could weaken this logic and allow multiple predecessors,
// if the values are produced outside the loop latch. We would need
// additional logic to update the PHI nodes in the exit block as
// well.
if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
return false;
// The incoming value is defined in the outer loop latch. Currently we
// only support that in case the outer loop latch has a single predecessor.
// This guarantees that the outer loop latch is executed if and only if
// the inner loop is executed (because tightlyNested() guarantees that the
// outer loop header only branches to the inner loop or the outer loop
// latch).
// FIXME: We could weaken this logic and allow multiple predecessors,
// if the values are produced outside the loop latch. We would need
// additional logic to update the PHI nodes in the exit block as
// well.
if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
return false;
}
}
return true;

View File

@ -135,9 +135,8 @@ for.end16: ; preds = %for.exit
ret void
}
; FIXME: We currently do not support LCSSA phi nodes involving floating point
; types, as we fail to detect floating point reductions for now.
; REMARK: UnsupportedPHIOuter
; Loops with floating point reductions are interchanged with fastmath.
; REMARK: Interchanged
; REMARK-NEXT: lcssa_04
define void @lcssa_04() {
@ -146,28 +145,31 @@ entry:
outer.header: ; preds = %outer.inc, %entry
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
%float.outer = phi float [ 1.000000e+00, %entry ], [ 2.000000e+00, %outer.inc ]
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.outer.next, %outer.inc ]
br label %for.body3
for.body3: ; preds = %for.body3, %outer.header
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
%float.inner = phi float [ %float.inner.next, %for.body3 ], [ %float.outer, %outer.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer
%vA = load i32, i32* %arrayidx5
%arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer
%vC = load i32, i32* %arrayidx9
%add = add nsw i32 %vA, %vC
%float.inner.next = fadd fast float %float.inner, 1.000000e+00
store i32 %add, i32* %arrayidx5
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
%exitcond = icmp eq i64 %iv.inner.next, 100
br i1 %exitcond, label %outer.inc, label %for.body3
outer.inc: ; preds = %for.body3
%float.outer.next = phi float [ %float.inner.next, %for.body3 ]
%iv.outer.next = add nsw i64 %iv.outer, 1
%cmp = icmp eq i64 %iv.outer.next, 100
br i1 %cmp, label %outer.header, label %for.exit
for.exit: ; preds = %outer.inc
%float.outer.lcssa = phi float [ %float.outer, %outer.inc ]
%float.outer.lcssa = phi float [ %float.outer.next, %outer.inc ]
store float %float.outer.lcssa, float* @F
br label %for.end16

View File

@ -227,3 +227,83 @@ for1.loopexit: ; preds = %for1.inc
%il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ]
ret i64 %il.res.lcssa2
}
; Floating point reductions are interchanged if all the fp instructions
; involved allow reassociation.
; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: test5
define float @test5([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
entry:
br label %outer.header
outer.header: ; preds = %outer.inc, %entry
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
br label %for.body3
for.body3: ; preds = %for.body3, %outer.header
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
%vA = load float, float* %arrayidx5
%float.inner.inc = fadd fast float %float.inner, %vA
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
%vB = load float, float* %arrayidx6
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
%exitcond = icmp eq i64 %iv.inner.next, 100
br i1 %exitcond, label %outer.inc, label %for.body3
outer.inc: ; preds = %for.body3
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
%iv.outer.next = add nsw i64 %iv.outer, 1
%cmp = icmp eq i64 %iv.outer.next, 100
br i1 %cmp, label %outer.header, label %for.exit
for.exit: ; preds = %outer.inc
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
ret float %float.outer.lcssa
}
; Floating point reductions are not interchanged if not all the fp instructions
; involved allow reassociation.
; REMARKS: --- !Missed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: UnsupportedPHIOuter
; REMARKS-NEXT: Function: test6
define float @test6([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
entry:
br label %outer.header
outer.header: ; preds = %outer.inc, %entry
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
br label %for.body3
for.body3: ; preds = %for.body3, %outer.header
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
%vA = load float, float* %arrayidx5
%float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
%vB = load float, float* %arrayidx6
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
%exitcond = icmp eq i64 %iv.inner.next, 100
br i1 %exitcond, label %outer.inc, label %for.body3
outer.inc: ; preds = %for.body3
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
%iv.outer.next = add nsw i64 %iv.outer, 1
%cmp = icmp eq i64 %iv.outer.next, 100
br i1 %cmp, label %outer.header, label %for.exit
for.exit: ; preds = %outer.inc
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
ret float %float.outer.lcssa
}