[LoopInterhcange] Handle movement of reduction phis appropriately

This patch fixes pr43326 and pr48212.

Currently when we move reduction phis to the right place,
loop interchange assumes the first phi in loop headers is
an induction phi, skips the first phi and assumes the rest
of phis are candidate reduction phis to move. However, it
may not always be the case.

This patch loops over all phis in loop headers and considers
a phi node as a candidate reduction phi to move only when it
is indeed a reduction phi across outer and inner loop.

Reviewed By: Whitney

Differential Revision: https://reviews.llvm.org/D102743
This commit is contained in:
Congzhe Cao 2021-05-31 16:11:04 -04:00 committed by CongzheUalberta
parent 5c9fe816e3
commit bfefde22b6
3 changed files with 149 additions and 5 deletions

View File

@ -1661,15 +1661,19 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
// replaced by Inners'.
OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
auto &OuterInnerReductions = LIL.getOuterInnerReductions();
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
for (PHINode &PHI : drop_begin(InnerLoopHeader->phis()))
for (PHINode &PHI : InnerLoopHeader->phis()) {
if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
continue;
InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
for (PHINode &PHI : drop_begin(OuterLoopHeader->phis()))
}
for (PHINode &PHI : OuterLoopHeader->phis()) {
if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
continue;
OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
auto &OuterInnerReductions = LIL.getOuterInnerReductions();
(void)OuterInnerReductions;
}
// Now move the remaining reduction PHIs from outer to inner loop header and
// vice versa. The PHI nodes must be part of a reduction across the inner and

View File

@ -0,0 +1,86 @@
; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
@a = global i32 0
@b = global i8 0
@c = global i32 0
@d = global i32 0
@e = global [1 x [1 x i32]] zeroinitializer
; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: pr43326
define void @pr43326() {
entry:
%0 = load i32, i32* @a
%tobool.not2 = icmp eq i32 %0, 0
br i1 %tobool.not2, label %for.end14, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
%d.promoted = load i32, i32* @d
%a.promoted = load i32, i32* @a
br label %for.body
for.body: ; preds = %for.body.lr.ph, %for.inc12
%inc1312 = phi i32 [ %a.promoted, %for.body.lr.ph ], [ %inc13, %for.inc12 ]
%xor.lcssa.lcssa11 = phi i32 [ %d.promoted, %for.body.lr.ph ], [ %xor.lcssa.lcssa, %for.inc12 ]
br label %for.body3
for.body3: ; preds = %for.body, %for.inc10
%xor.lcssa9 = phi i32 [ %xor.lcssa.lcssa11, %for.body ], [ %xor.lcssa, %for.inc10 ]
%dec7 = phi i8 [ 0, %for.body ], [ %dec, %for.inc10 ]
%idxprom8 = sext i8 %dec7 to i64
br label %for.body7
for.body7: ; preds = %for.body3, %for.inc
%xor5 = phi i32 [ %xor.lcssa9, %for.body3 ], [ %xor, %for.inc ]
%inc4 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%idxprom = sext i32 %inc4 to i64
%arrayidx9 = getelementptr inbounds [1 x [1 x i32]], [1 x [1 x i32]]* @e, i64 0, i64 %idxprom, i64 %idxprom8
%1 = load i32, i32* %arrayidx9
%xor = xor i32 %xor5, %1
br label %for.inc
for.inc: ; preds = %for.body7
%inc = add nsw i32 %inc4, 1
%cmp5 = icmp slt i32 %inc, 1
br i1 %cmp5, label %for.body7, label %for.end
for.end: ; preds = %for.inc
%xor.lcssa = phi i32 [ %xor, %for.inc ]
%inc.lcssa = phi i32 [ %inc, %for.inc ]
br label %for.inc10
for.inc10: ; preds = %for.end
%dec = add i8 %dec7, -1
%cmp = icmp sgt i8 %dec, -1
br i1 %cmp, label %for.body3, label %for.end11
for.end11: ; preds = %for.inc10
%xor.lcssa.lcssa = phi i32 [ %xor.lcssa, %for.inc10 ]
%dec.lcssa = phi i8 [ %dec, %for.inc10 ]
%inc.lcssa.lcssa = phi i32 [ %inc.lcssa, %for.inc10 ]
br label %for.inc12
for.inc12: ; preds = %for.end11
%inc13 = add nsw i32 %inc1312, 1
%tobool.not = icmp eq i32 %inc13, 0
br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %for.body
for.cond.for.end14_crit_edge: ; preds = %for.inc12
%inc13.lcssa = phi i32 [ %inc13, %for.inc12 ]
%inc.lcssa.lcssa.lcssa = phi i32 [ %inc.lcssa.lcssa, %for.inc12 ]
%xor.lcssa.lcssa.lcssa = phi i32 [ %xor.lcssa.lcssa, %for.inc12 ]
%dec.lcssa.lcssa = phi i8 [ %dec.lcssa, %for.inc12 ]
store i8 %dec.lcssa.lcssa, i8* @b
store i32 %xor.lcssa.lcssa.lcssa, i32* @d
store i32 %inc.lcssa.lcssa.lcssa, i32* @c
store i32 %inc13.lcssa, i32* @a
br label %for.end14
for.end14: ; preds = %for.cond.for.end14_crit_edge, %entry
ret void
}

View File

@ -0,0 +1,54 @@
; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1
; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
; REMARKS: --- !Passed
; REMARKS-NEXT: Pass: loop-interchange
; REMARKS-NEXT: Name: Interchanged
; REMARKS-NEXT: Function: pr48212
define void @pr48212([5 x i32]* %filter) {
entry:
br label %L1
L1: ; preds = %entry
br label %for.body
for.body: ; preds = %L1, %for.inc6
%temp.04 = phi i32 [ undef, %L1 ], [ %temp.1.lcssa, %for.inc6 ]
%k1.03 = phi i32 [ 0, %L1 ], [ %inc7, %for.inc6 ]
br label %L2
L2: ; preds = %for.body
br label %for.body3
for.body3: ; preds = %L2, %for.inc
%temp.12 = phi i32 [ %temp.04, %L2 ], [ %add, %for.inc ]
%k2.01 = phi i32 [ 0, %L2 ], [ %inc, %for.inc ]
%idxprom = sext i32 %k2.01 to i64
%arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* %filter, i64 %idxprom
%idxprom4 = sext i32 %k1.03 to i64
%arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %arrayidx, i64 0, i64 %idxprom4
%0 = load i32, i32* %arrayidx5
%add = add nsw i32 %temp.12, %0
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %k2.01, 1
%cmp2 = icmp slt i32 %inc, 3
br i1 %cmp2, label %for.body3, label %for.end
for.end: ; preds = %for.inc
%temp.1.lcssa = phi i32 [ %add, %for.inc ]
br label %for.inc6
for.inc6: ; preds = %for.end
%inc7 = add nsw i32 %k1.03, 1
%cmp = icmp slt i32 %inc7, 5
br i1 %cmp, label %for.body, label %for.end8
for.end8: ; preds = %for.inc6
ret void
}