[LoopInterchange] Support loop interchange with floating point reductions
Enabled loop interchange support for floating point reductions if it is allowed to reorder floating point operations. Previously when we encouter a floating point PHI node in the outer loop exit block, we bailed out since we could not detect floating point reductions in the early days. Now we remove this limiation since we are able to detect floating point reductions. Reviewed By: #loopoptwg, Meinersbur Differential Revision: https://reviews.llvm.org/D117450
This commit is contained in:
parent
0d8850ae2c
commit
1ef04326ec
|
@ -733,8 +733,12 @@ static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
|
||||||
if (PHI->getNumIncomingValues() == 1)
|
if (PHI->getNumIncomingValues() == 1)
|
||||||
continue;
|
continue;
|
||||||
RecurrenceDescriptor RD;
|
RecurrenceDescriptor RD;
|
||||||
if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
|
if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
|
||||||
|
// Detect floating point reduction only when it can be reordered.
|
||||||
|
if (RD.getExactFPMathInst() != nullptr)
|
||||||
|
return nullptr;
|
||||||
return PHI;
|
return PHI;
|
||||||
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -893,11 +897,6 @@ areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
|
||||||
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
|
static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
|
||||||
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
|
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
|
||||||
for (PHINode &PHI : LoopNestExit->phis()) {
|
for (PHINode &PHI : LoopNestExit->phis()) {
|
||||||
// FIXME: We currently are not able to detect floating point reductions
|
|
||||||
// and have to use floating point PHIs as a proxy to prevent
|
|
||||||
// interchanging in the presence of floating point reductions.
|
|
||||||
if (PHI.getType()->isFloatingPointTy())
|
|
||||||
return false;
|
|
||||||
for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
|
for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
|
||||||
Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
|
Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
|
||||||
if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
|
if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
|
||||||
|
|
|
@ -135,9 +135,8 @@ for.end16: ; preds = %for.exit
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: We currently do not support LCSSA phi nodes involving floating point
|
; Loops with floating point reductions are interchanged with fastmath.
|
||||||
; types, as we fail to detect floating point reductions for now.
|
; REMARK: Interchanged
|
||||||
; REMARK: UnsupportedPHIOuter
|
|
||||||
; REMARK-NEXT: lcssa_04
|
; REMARK-NEXT: lcssa_04
|
||||||
|
|
||||||
define void @lcssa_04() {
|
define void @lcssa_04() {
|
||||||
|
@ -146,28 +145,31 @@ entry:
|
||||||
|
|
||||||
outer.header: ; preds = %outer.inc, %entry
|
outer.header: ; preds = %outer.inc, %entry
|
||||||
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
|
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
|
||||||
%float.outer = phi float [ 1.000000e+00, %entry ], [ 2.000000e+00, %outer.inc ]
|
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.outer.next, %outer.inc ]
|
||||||
br label %for.body3
|
br label %for.body3
|
||||||
|
|
||||||
for.body3: ; preds = %for.body3, %outer.header
|
for.body3: ; preds = %for.body3, %outer.header
|
||||||
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
|
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
|
||||||
|
%float.inner = phi float [ %float.inner.next, %for.body3 ], [ %float.outer, %outer.header ]
|
||||||
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer
|
%arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer
|
||||||
%vA = load i32, i32* %arrayidx5
|
%vA = load i32, i32* %arrayidx5
|
||||||
%arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer
|
%arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer
|
||||||
%vC = load i32, i32* %arrayidx9
|
%vC = load i32, i32* %arrayidx9
|
||||||
%add = add nsw i32 %vA, %vC
|
%add = add nsw i32 %vA, %vC
|
||||||
|
%float.inner.next = fadd fast float %float.inner, 1.000000e+00
|
||||||
store i32 %add, i32* %arrayidx5
|
store i32 %add, i32* %arrayidx5
|
||||||
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
|
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
|
||||||
%exitcond = icmp eq i64 %iv.inner.next, 100
|
%exitcond = icmp eq i64 %iv.inner.next, 100
|
||||||
br i1 %exitcond, label %outer.inc, label %for.body3
|
br i1 %exitcond, label %outer.inc, label %for.body3
|
||||||
|
|
||||||
outer.inc: ; preds = %for.body3
|
outer.inc: ; preds = %for.body3
|
||||||
|
%float.outer.next = phi float [ %float.inner.next, %for.body3 ]
|
||||||
%iv.outer.next = add nsw i64 %iv.outer, 1
|
%iv.outer.next = add nsw i64 %iv.outer, 1
|
||||||
%cmp = icmp eq i64 %iv.outer.next, 100
|
%cmp = icmp eq i64 %iv.outer.next, 100
|
||||||
br i1 %cmp, label %outer.header, label %for.exit
|
br i1 %cmp, label %outer.header, label %for.exit
|
||||||
|
|
||||||
for.exit: ; preds = %outer.inc
|
for.exit: ; preds = %outer.inc
|
||||||
%float.outer.lcssa = phi float [ %float.outer, %outer.inc ]
|
%float.outer.lcssa = phi float [ %float.outer.next, %outer.inc ]
|
||||||
store float %float.outer.lcssa, float* @F
|
store float %float.outer.lcssa, float* @F
|
||||||
br label %for.end16
|
br label %for.end16
|
||||||
|
|
||||||
|
|
|
@ -227,3 +227,83 @@ for1.loopexit: ; preds = %for1.inc
|
||||||
%il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ]
|
%il.res.lcssa2 = phi i64 [ %sum.inc.amend, %for1.inc ]
|
||||||
ret i64 %il.res.lcssa2
|
ret i64 %il.res.lcssa2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Floating point reductions are interchanged if all the fp instructions
|
||||||
|
; involved allow reassociation.
|
||||||
|
; REMARKS: --- !Passed
|
||||||
|
; REMARKS-NEXT: Pass: loop-interchange
|
||||||
|
; REMARKS-NEXT: Name: Interchanged
|
||||||
|
; REMARKS-NEXT: Function: test5
|
||||||
|
|
||||||
|
define float @test5([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
|
||||||
|
entry:
|
||||||
|
br label %outer.header
|
||||||
|
|
||||||
|
outer.header: ; preds = %outer.inc, %entry
|
||||||
|
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
|
||||||
|
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
|
||||||
|
br label %for.body3
|
||||||
|
|
||||||
|
for.body3: ; preds = %for.body3, %outer.header
|
||||||
|
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
|
||||||
|
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
|
||||||
|
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
|
||||||
|
%vA = load float, float* %arrayidx5
|
||||||
|
%float.inner.inc = fadd fast float %float.inner, %vA
|
||||||
|
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
|
||||||
|
%vB = load float, float* %arrayidx6
|
||||||
|
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
|
||||||
|
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.inner.next, 100
|
||||||
|
br i1 %exitcond, label %outer.inc, label %for.body3
|
||||||
|
|
||||||
|
outer.inc: ; preds = %for.body3
|
||||||
|
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
|
||||||
|
%iv.outer.next = add nsw i64 %iv.outer, 1
|
||||||
|
%cmp = icmp eq i64 %iv.outer.next, 100
|
||||||
|
br i1 %cmp, label %outer.header, label %for.exit
|
||||||
|
|
||||||
|
for.exit: ; preds = %outer.inc
|
||||||
|
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
|
||||||
|
ret float %float.outer.lcssa
|
||||||
|
}
|
||||||
|
|
||||||
|
; Floating point reductions are not interchanged if not all the fp instructions
|
||||||
|
; involved allow reassociation.
|
||||||
|
; REMARKS: --- !Missed
|
||||||
|
; REMARKS-NEXT: Pass: loop-interchange
|
||||||
|
; REMARKS-NEXT: Name: UnsupportedPHIOuter
|
||||||
|
; REMARKS-NEXT: Function: test6
|
||||||
|
|
||||||
|
define float @test6([100 x [100 x float]]* %Arr, [100 x [100 x float]]* %Arr2) {
|
||||||
|
entry:
|
||||||
|
br label %outer.header
|
||||||
|
|
||||||
|
outer.header: ; preds = %outer.inc, %entry
|
||||||
|
%iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]
|
||||||
|
%float.outer = phi float [ 1.000000e+00, %entry ], [ %float.inner.lcssa, %outer.inc ]
|
||||||
|
br label %for.body3
|
||||||
|
|
||||||
|
for.body3: ; preds = %for.body3, %outer.header
|
||||||
|
%float.inner = phi float [ %float.outer , %outer.header ], [ %float.inner.inc.inc, %for.body3 ]
|
||||||
|
%iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]
|
||||||
|
%arrayidx5 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr, i64 0, i64 %iv.inner, i64 %iv.outer
|
||||||
|
%vA = load float, float* %arrayidx5
|
||||||
|
%float.inner.inc = fadd float %float.inner, %vA ; do not allow reassociation
|
||||||
|
%arrayidx6 = getelementptr inbounds [100 x [100 x float]], [100 x [100 x float]]* %Arr2, i64 0, i64 %iv.inner, i64 %iv.outer
|
||||||
|
%vB = load float, float* %arrayidx6
|
||||||
|
%float.inner.inc.inc = fadd fast float %float.inner.inc, %vB
|
||||||
|
%iv.inner.next = add nuw nsw i64 %iv.inner, 1
|
||||||
|
%exitcond = icmp eq i64 %iv.inner.next, 100
|
||||||
|
br i1 %exitcond, label %outer.inc, label %for.body3
|
||||||
|
|
||||||
|
outer.inc: ; preds = %for.body3
|
||||||
|
%float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ]
|
||||||
|
%iv.outer.next = add nsw i64 %iv.outer, 1
|
||||||
|
%cmp = icmp eq i64 %iv.outer.next, 100
|
||||||
|
br i1 %cmp, label %outer.header, label %for.exit
|
||||||
|
|
||||||
|
for.exit: ; preds = %outer.inc
|
||||||
|
%float.outer.lcssa = phi float [ %float.inner.lcssa, %outer.inc ]
|
||||||
|
ret float %float.outer.lcssa
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue