Revert "[StructurizeCFG] Improve basic block ordering"
This reverts commit f1b05a0a2b
.
Need to revert to due to issues identified with testing. The
transformation is incorrect for blocks that contain convergent
instructions.
This commit is contained in:
parent
ffa7384f10
commit
c945d88d2b
|
@ -68,11 +68,6 @@ static cl::opt<bool>
|
|||
cl::desc("Allow relaxed uniform region checks"),
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
ReorderNodeSize("structurizecfg-node-reorder-size",
|
||||
cl::desc("Limit region size for reordering nodes"),
|
||||
cl::init(100), cl::Hidden);
|
||||
|
||||
// Definition of the complex types used in this pass.
|
||||
|
||||
using BBValuePair = std::pair<BasicBlock *, Value *>;
|
||||
|
@ -267,8 +262,6 @@ class StructurizeCFG {
|
|||
|
||||
void orderNodes();
|
||||
|
||||
void reorderNodes();
|
||||
|
||||
void analyzeLoops(RegionNode *N);
|
||||
|
||||
Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
|
||||
|
@ -427,57 +420,6 @@ void StructurizeCFG::orderNodes() {
|
|||
}
|
||||
}
|
||||
|
||||
/// Change the node ordering to decrease the range of live values, especially
|
||||
/// the values that capture the control flow path for branches. We do this
|
||||
/// by moving blocks with a single predecessor and successor to appear after
|
||||
/// predecessor. The motivation is to move some loop exit blocks into a loop.
|
||||
/// In cases where a loop has a large number of exit blocks, this reduces the
|
||||
/// amount of values needed across the loop boundary.
|
||||
void StructurizeCFG::reorderNodes() {
|
||||
SmallVector<RegionNode *, 8> NewOrder;
|
||||
DenseMap<BasicBlock *, unsigned> MoveTo;
|
||||
BitVector Moved(Order.size());
|
||||
|
||||
// The benefits of reordering nodes occurs for large regions.
|
||||
if (Order.size() <= ReorderNodeSize)
|
||||
return;
|
||||
|
||||
// The algorithm works with two passes over Order. The first pass identifies
|
||||
// the blocks to move and the position to move them to. The second pass
|
||||
// creates the new order based upon this information. We move blocks with
|
||||
// a single predecessor and successor. If there are multiple candidates then
|
||||
// maintain the original order.
|
||||
BBSet Seen;
|
||||
for (int I = Order.size() - 1; I >= 0; --I) {
|
||||
auto *BB = Order[I]->getEntry();
|
||||
Seen.insert(BB);
|
||||
auto *Pred = BB->getSinglePredecessor();
|
||||
auto *Succ = BB->getSingleSuccessor();
|
||||
// Consider only those basic blocks that have a predecessor in Order and a
|
||||
// successor that exits the region. The region may contain subregions that
|
||||
// have been structurized and are not included in Order.
|
||||
if (Pred && Succ && Seen.count(Pred) && Succ == ParentRegion->getExit() &&
|
||||
!MoveTo.count(Pred)) {
|
||||
MoveTo[Pred] = I;
|
||||
Moved.set(I);
|
||||
}
|
||||
}
|
||||
|
||||
// If no blocks have been moved then the original order is good.
|
||||
if (!Moved.count())
|
||||
return;
|
||||
|
||||
for (size_t I = 0, E = Order.size(); I < E; ++I) {
|
||||
auto *BB = Order[I]->getEntry();
|
||||
if (MoveTo.count(BB))
|
||||
NewOrder.push_back(Order[MoveTo[BB]]);
|
||||
if (!Moved[I])
|
||||
NewOrder.push_back(Order[I]);
|
||||
}
|
||||
|
||||
Order.assign(NewOrder);
|
||||
}
|
||||
|
||||
/// Determine the end of the loops
|
||||
void StructurizeCFG::analyzeLoops(RegionNode *N) {
|
||||
if (N->isSubRegion()) {
|
||||
|
@ -1139,7 +1081,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
|
|||
ParentRegion = R;
|
||||
|
||||
orderNodes();
|
||||
reorderNodes();
|
||||
collectInfos();
|
||||
createFlow();
|
||||
insertConditions(false);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=IR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
|
@ -47,9 +48,9 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
|
|||
; GCN-NEXT: s_endpgm
|
||||
; IR-LABEL: @reduced_nested_loop_conditions(
|
||||
; IR-NEXT: bb:
|
||||
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4:[0-9]+]]
|
||||
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4
|
||||
; IR-NEXT: [[MY_TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* [[ARG:%.*]], i32 [[MY_TMP]]
|
||||
; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]], align 4
|
||||
; IR-NEXT: [[MY_TMP2:%.*]] = load volatile i64, i64 addrspace(3)* [[MY_TMP1]]
|
||||
; IR-NEXT: br label [[BB5:%.*]]
|
||||
; IR: bb3:
|
||||
; IR-NEXT: br i1 true, label [[BB4:%.*]], label [[BB13:%.*]]
|
||||
|
@ -83,7 +84,7 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
|
|||
; IR: bb16:
|
||||
; IR-NEXT: [[MY_TMP17:%.*]] = extractelement <2 x i32> [[MY_TMP15]], i64 1
|
||||
; IR-NEXT: [[MY_TMP18:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* undef, i32 [[MY_TMP17]]
|
||||
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]], align 4
|
||||
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[MY_TMP18]]
|
||||
; IR-NEXT: br label [[BB20]]
|
||||
; IR: bb20:
|
||||
; IR-NEXT: [[MY_TMP21]] = phi i32 [ [[MY_TMP19]], [[BB16]] ], [ 0, [[BB13]] ]
|
||||
|
@ -92,7 +93,6 @@ define amdgpu_kernel void @reduced_nested_loop_conditions(i64 addrspace(3)* noca
|
|||
; IR: bb23:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP6]])
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%my.tmp1 = getelementptr inbounds i64, i64 addrspace(3)* %arg, i32 %my.tmp
|
||||
|
@ -190,19 +190,19 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; GCN-NEXT: s_endpgm
|
||||
; IR-LABEL: @nested_loop_conditions(
|
||||
; IR-NEXT: bb:
|
||||
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #[[ATTR4]]
|
||||
; IR-NEXT: [[MY_TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() #4
|
||||
; IR-NEXT: [[MY_TMP1:%.*]] = zext i32 [[MY_TMP]] to i64
|
||||
; IR-NEXT: [[MY_TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG:%.*]], i64 [[MY_TMP1]]
|
||||
; IR-NEXT: [[MY_TMP3:%.*]] = load i64, i64 addrspace(1)* [[MY_TMP2]], align 16
|
||||
; IR-NEXT: [[MY_TMP932:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
|
||||
; IR-NEXT: [[MY_TMP1033:%.*]] = extractelement <4 x i32> [[MY_TMP932]], i64 0
|
||||
; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
|
||||
; IR-NEXT: [[MY_TMP1134:%.*]] = load volatile i32, i32 addrspace(1)* undef
|
||||
; IR-NEXT: [[MY_TMP1235:%.*]] = icmp slt i32 [[MY_TMP1134]], 9
|
||||
; IR-NEXT: br i1 [[MY_TMP1235]], label [[BB14_LR_PH:%.*]], label [[FLOW:%.*]]
|
||||
; IR: bb14.lr.ph:
|
||||
; IR-NEXT: br label [[BB14:%.*]]
|
||||
; IR: Flow3:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP20:%.*]])
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP21:%.*]])
|
||||
; IR-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP14:%.*]])
|
||||
; IR-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0
|
||||
; IR-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1
|
||||
|
@ -244,7 +244,7 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; IR-NEXT: [[TMP17:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP16]])
|
||||
; IR-NEXT: br i1 [[TMP17]], label [[FLOW2:%.*]], label [[BB14]]
|
||||
; IR: bb18:
|
||||
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
|
||||
; IR-NEXT: [[MY_TMP19:%.*]] = load volatile i32, i32 addrspace(1)* undef
|
||||
; IR-NEXT: [[MY_TMP20:%.*]] = icmp slt i32 [[MY_TMP19]], 9
|
||||
; IR-NEXT: br i1 [[MY_TMP20]], label [[BB21]], label [[BB18]]
|
||||
; IR: bb21:
|
||||
|
@ -261,22 +261,21 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a
|
|||
; IR-NEXT: [[MY_TMP8:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 [[MY_TMP7]]
|
||||
; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16
|
||||
; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0
|
||||
; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
|
||||
; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef
|
||||
; IR-NEXT: [[MY_TMP12]] = icmp sge i32 [[MY_TMP11]], 9
|
||||
; IR-NEXT: br label [[FLOW1]]
|
||||
; IR: Flow2:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]])
|
||||
; IR-NEXT: [[TMP18:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
|
||||
; IR-NEXT: [[TMP19:%.*]] = extractvalue { i1, i64 } [[TMP18]], 0
|
||||
; IR-NEXT: [[TMP20]] = extractvalue { i1, i64 } [[TMP18]], 1
|
||||
; IR-NEXT: br i1 [[TMP19]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]]
|
||||
; IR-NEXT: [[TMP19:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP15]])
|
||||
; IR-NEXT: [[TMP20:%.*]] = extractvalue { i1, i64 } [[TMP19]], 0
|
||||
; IR-NEXT: [[TMP21]] = extractvalue { i1, i64 } [[TMP19]], 1
|
||||
; IR-NEXT: br i1 [[TMP20]], label [[BB31_LOOPEXIT:%.*]], label [[FLOW3]]
|
||||
; IR: bb31.loopexit:
|
||||
; IR-NEXT: br label [[FLOW3]]
|
||||
; IR: bb31:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
|
||||
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef, align 4
|
||||
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
|
||||
; IR-NEXT: ret void
|
||||
;
|
||||
bb:
|
||||
%my.tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
%my.tmp1 = zext i32 %my.tmp to i64
|
||||
|
|
|
@ -1,511 +0,0 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -structurizecfg -structurizecfg-node-reorder-size=3 %s -o - | FileCheck %s
|
||||
; RUN: opt -S -passes=structurizecfg -structurizecfg-node-reorder-size=3 %s -o - | FileCheck %s
|
||||
|
||||
; Test that exit blocks for a loop are reordered so that they
|
||||
; appear after their predecessors rather than after the loop.
|
||||
; This reduces the number of values needed after the loop
|
||||
; to record if the exit blocks are taken/not taken.
|
||||
|
||||
define void @reorder_loop(i1 %PredEntry, i1 %PredA, i1 %PredC) {
|
||||
; CHECK-LABEL: @reorder_loop(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]]
|
||||
; CHECK: A:
|
||||
; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FLOW1:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[PREDA:%.*]], label [[B:%.*]], label [[FLOW:%.*]]
|
||||
; CHECK: B:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0:[0-9]+]]
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[B]] ], [ true, [[A]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[C:%.*]], label [[FLOW1]]
|
||||
; CHECK: C:
|
||||
; CHECK-NEXT: br i1 [[PREDC:%.*]], label [[D:%.*]], label [[FLOW2:%.*]]
|
||||
; CHECK: Flow1:
|
||||
; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP5:%.*]], [[FLOW3:%.*]] ], [ undef, [[FLOW]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP6:%.*]], [[FLOW3]] ], [ false, [[FLOW]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW3]] ], [ true, [[FLOW]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]]
|
||||
; CHECK: D:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
|
||||
; CHECK-NEXT: br label [[FLOW2]]
|
||||
; CHECK: Flow2:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[D]] ], [ true, [[C]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[E:%.*]], label [[FLOW3]]
|
||||
; CHECK: E:
|
||||
; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC2]], 10
|
||||
; CHECK-NEXT: br label [[FLOW3]]
|
||||
; CHECK: F:
|
||||
; CHECK-NEXT: unreachable
|
||||
; CHECK: G:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: Flow3:
|
||||
; CHECK-NEXT: [[TMP5]] = phi i32 [ [[INC2]], [[E]] ], [ undef, [[FLOW2]] ]
|
||||
; CHECK-NEXT: [[TMP6]] = phi i1 [ true, [[E]] ], [ false, [[FLOW2]] ]
|
||||
; CHECK-NEXT: [[TMP7]] = phi i1 [ [[CMP]], [[E]] ], [ true, [[FLOW2]] ]
|
||||
; CHECK-NEXT: br label [[FLOW1]]
|
||||
; CHECK: loop.exit.guard:
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[G]], label [[F:%.*]]
|
||||
;
|
||||
entry:
|
||||
br i1 %PredEntry, label %A, label %G
|
||||
|
||||
; A is the loop header.
|
||||
A:
|
||||
%inc1 = phi i32 [ 0, %entry ], [ %inc2, %E ]
|
||||
br i1 %PredA, label %B, label %C
|
||||
|
||||
; B is a loop exit block. After reorderNodes in StructurizeCFG, this block
|
||||
; remains after A. Without reorderNodes, B is dependent on the Flow block for
|
||||
; the loop back edge. B is executes only when the loop exits from A. It does not
|
||||
; execute during the loop iterations.
|
||||
B:
|
||||
tail call fastcc void @check(i32 1) #0
|
||||
br label %loop.exit.guard
|
||||
|
||||
C:
|
||||
br i1 %PredC, label %D, label %E
|
||||
|
||||
; D is a loop exit block. After reorderNodes, this block remains in the loop
|
||||
; and is not dependent on the Flow block that contains the back edge to the
|
||||
; loop header.
|
||||
D:
|
||||
tail call fastcc void @check(i32 2) #0
|
||||
br label %loop.exit.guard
|
||||
|
||||
; E is a latch and exiting block.
|
||||
E:
|
||||
%inc2 = add i32 %inc1, 1
|
||||
%cmp = icmp ult i32 %inc2, 10
|
||||
br i1 %cmp, label %A, label %loop.exit.guard
|
||||
|
||||
; Paths fom the exit blocks come here through loop.exit.gaurd.
|
||||
F:
|
||||
unreachable
|
||||
|
||||
G:
|
||||
ret void
|
||||
|
||||
; loop.exit.guard is added by UnifyLoopExits as a common block for all loop
|
||||
; exits. After StructurizeCFG, the predecessor B will remain in the loop.
|
||||
loop.exit.guard:
|
||||
%Guard.G = phi i1 [ true, %E ], [ false, %B ], [ false, %D ]
|
||||
br i1 %Guard.G, label %G, label %F
|
||||
}
|
||||
|
||||
; Test that the exit blocks for a nested loop are reordered so that they are
|
||||
; not dependent on the Flow block that contains the loop back edge. That
|
||||
; dependence forces the block to appear after the loop, which increases the
|
||||
; number of live values exiting the loop.
|
||||
|
||||
define void @reorder_inner_loop(i1 %PredEntry, i1 %PredB, i1 %PredD) {
|
||||
; CHECK-LABEL: @reorder_inner_loop(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true
|
||||
; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[I:%.*]]
|
||||
; CHECK: A:
|
||||
; CHECK-NEXT: [[OUTER1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[FLOW4:%.*]] ]
|
||||
; CHECK-NEXT: br label [[B:%.*]]
|
||||
; CHECK: B:
|
||||
; CHECK-NEXT: [[INNER1:%.*]] = phi i32 [ [[TMP1:%.*]], [[FLOW1:%.*]] ], [ 0, [[A]] ]
|
||||
; CHECK-NEXT: br i1 [[PREDB_INV]], label [[C:%.*]], label [[FLOW:%.*]]
|
||||
; CHECK: C:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[C]] ], [ true, [[B]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[D:%.*]], label [[FLOW1]]
|
||||
; CHECK: D:
|
||||
; CHECK-NEXT: br i1 [[PREDD:%.*]], label [[E:%.*]], label [[FLOW2:%.*]]
|
||||
; CHECK: Flow1:
|
||||
; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP8:%.*]], [[FLOW3:%.*]] ], [ undef, [[FLOW]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW3]] ], [ false, [[FLOW]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP10:%.*]], [[FLOW3]] ], [ true, [[FLOW]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT_GUARD1:%.*]], label [[B]]
|
||||
; CHECK: E:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
|
||||
; CHECK-NEXT: br label [[FLOW2]]
|
||||
; CHECK: Flow2:
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[E]] ], [ true, [[D]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[F:%.*]], label [[FLOW3]]
|
||||
; CHECK: F:
|
||||
; CHECK-NEXT: [[INNER2:%.*]] = add i32 [[INNER1]], 1
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i32 [[INNER2]], 20
|
||||
; CHECK-NEXT: br label [[FLOW3]]
|
||||
; CHECK: G:
|
||||
; CHECK-NEXT: [[OUTER2:%.*]] = add i32 [[OUTER1]], 1
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp uge i32 [[OUTER2]], 10
|
||||
; CHECK-NEXT: br label [[FLOW4]]
|
||||
; CHECK: H:
|
||||
; CHECK-NEXT: unreachable
|
||||
; CHECK: I:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: Flow4:
|
||||
; CHECK-NEXT: [[TMP5]] = phi i32 [ [[OUTER2]], [[G:%.*]] ], [ undef, [[LOOP_EXIT_GUARD1]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = phi i1 [ true, [[G]] ], [ false, [[LOOP_EXIT_GUARD1]] ]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[CMP2]], [[G]] ], [ true, [[LOOP_EXIT_GUARD1]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]]
|
||||
; CHECK: loop.exit.guard:
|
||||
; CHECK-NEXT: br i1 [[TMP6]], label [[I]], label [[H:%.*]]
|
||||
; CHECK: Flow3:
|
||||
; CHECK-NEXT: [[TMP8]] = phi i32 [ [[INNER2]], [[F]] ], [ undef, [[FLOW2]] ]
|
||||
; CHECK-NEXT: [[TMP9]] = phi i1 [ true, [[F]] ], [ false, [[FLOW2]] ]
|
||||
; CHECK-NEXT: [[TMP10]] = phi i1 [ [[CMP1]], [[F]] ], [ true, [[FLOW2]] ]
|
||||
; CHECK-NEXT: br label [[FLOW1]]
|
||||
; CHECK: loop.exit.guard1:
|
||||
; CHECK-NEXT: br i1 [[TMP2]], label [[G]], label [[FLOW4]]
|
||||
;
|
||||
entry:
|
||||
br i1 %PredEntry, label %A, label %I
|
||||
|
||||
; A is the outer loop header block.
|
||||
A:
|
||||
%outer1 = phi i32 [ 0, %entry ], [ %outer2, %G ]
|
||||
br label %B
|
||||
|
||||
; B is the inner loop header block.
|
||||
B:
|
||||
%inner1 = phi i32 [ 0, %A ], [ %inner2, %F ]
|
||||
br i1 %PredB, label %D, label %C
|
||||
|
||||
; C is a loop exit block. After reorderNodes in StructureCFG, this block
|
||||
; remains after B rather and is not dependent on the Flow block that contains
|
||||
; the back edge to the loop header.
|
||||
C:
|
||||
tail call fastcc void @check(i32 1) #0
|
||||
br label %loop.exit.guard1
|
||||
|
||||
D:
|
||||
br i1 %PredD, label %E, label %F
|
||||
|
||||
; E is a loop exit block. After reorderNodes in StructurizeCFG, this block
|
||||
; remains in the loop and is not dependent on the Flow block that contains
|
||||
; the back edge to the loop header.
|
||||
E:
|
||||
tail call fastcc void @check(i32 2) #0
|
||||
br label %loop.exit.guard1
|
||||
|
||||
; F is a latch block for the inner loop.
|
||||
F:
|
||||
%inner2 = add i32 %inner1, 1
|
||||
%cmp1 = icmp ult i32 %inner2, 20
|
||||
br i1 %cmp1, label %B, label %loop.exit.guard1
|
||||
|
||||
; G is a latch block for the outer loop.
|
||||
G:
|
||||
%outer2 = add i32 %outer1, 1
|
||||
%cmp2 = icmp ult i32 %outer2, 10
|
||||
br i1 %cmp2, label %A, label %loop.exit.guard
|
||||
|
||||
; Paths from the exit blocks come here through loop.exit.gaurd.
|
||||
H:
|
||||
unreachable
|
||||
|
||||
I:
|
||||
ret void
|
||||
|
||||
; loop.exit.guard is added by UnifyLoopExits as a common block for all loop
|
||||
; exits. This instance is for the outer loop.
|
||||
loop.exit.guard:
|
||||
%Guard.I = phi i1 [ true, %G ], [ %Guard.I.moved, %loop.exit.guard1 ]
|
||||
br i1 %Guard.I, label %I, label %H
|
||||
|
||||
; loop.exit.guard is added for the inner loop.
|
||||
loop.exit.guard1:
|
||||
%Guard.G = phi i1 [ true, %F ], [ false, %C ], [ false, %E ]
|
||||
%Guard.I.moved = phi i1 [ undef, %F ], [ false, %C ], [ false, %E ]
|
||||
br i1 %Guard.G, label %G, label %loop.exit.guard
|
||||
}
|
||||
|
||||
; Test when the common successor is not unreachable. In particular, the common
|
||||
; successor is inside the region. In this case, it is still helpful to keep
|
||||
; the exit blocks inside the loop to reduce live values that exit the loop.
|
||||
|
||||
define void @common_exit(i1 %PredEntry, i1 %PredA, i1 %PredB) {
|
||||
; CHECK-LABEL: @common_exit(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[PREDB_INV:%.*]] = xor i1 [[PREDB:%.*]], true
|
||||
; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true
|
||||
; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[G:%.*]]
|
||||
; CHECK: A:
|
||||
; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP4:%.*]], [[FLOW:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[PREDA_INV]], label [[C:%.*]], label [[FLOW]]
|
||||
; CHECK: Flow3:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ true, [[D:%.*]] ], [ false, [[FLOW2:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP6:%.*]], label [[B:%.*]], label [[FLOW4:%.*]]
|
||||
; CHECK: B:
|
||||
; CHECK-NEXT: tail call fastcc void @check1(i32 1) #[[ATTR1:[0-9]+]]
|
||||
; CHECK-NEXT: br label [[FLOW4]]
|
||||
; CHECK: C:
|
||||
; CHECK-NEXT: br i1 [[PREDB_INV]], label [[E:%.*]], label [[FLOW1:%.*]]
|
||||
; CHECK: Flow1:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC2:%.*]], [[E]] ], [ undef, [[C]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[CMP:%.*]], [[E]] ], [ true, [[C]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ false, [[E]] ], [ true, [[C]] ]
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: Flow2:
|
||||
; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[D]], label [[FLOW3:%.*]]
|
||||
; CHECK: D:
|
||||
; CHECK-NEXT: tail call fastcc void @check1(i32 2) #[[ATTR1]]
|
||||
; CHECK-NEXT: br label [[FLOW3]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[A]] ]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ true, [[A]] ]
|
||||
; CHECK-NEXT: [[TMP6]] = phi i1 [ false, [[FLOW1]] ], [ true, [[A]] ]
|
||||
; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[A]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW2]], label [[A]]
|
||||
; CHECK: E:
|
||||
; CHECK-NEXT: [[INC2]] = add i32 [[INC1]], 1
|
||||
; CHECK-NEXT: [[CMP]] = icmp uge i32 [[INC2]], 10
|
||||
; CHECK-NEXT: br label [[FLOW1]]
|
||||
; CHECK: Flow4:
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = phi i1 [ true, [[B]] ], [ [[TMP0]], [[FLOW3]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[F:%.*]], label [[FLOW5:%.*]]
|
||||
; CHECK: F:
|
||||
; CHECK-NEXT: br label [[FLOW5]]
|
||||
; CHECK: Flow5:
|
||||
; CHECK-NEXT: br label [[G]]
|
||||
; CHECK: G:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
entry:
|
||||
br i1 %PredEntry, label %A, label %G
|
||||
|
||||
; A is the loop header.
|
||||
A:
|
||||
%inc1 = phi i32 [ 0, %entry ], [ %inc2, %E ]
|
||||
br i1 %PredA, label %B, label %C
|
||||
|
||||
; B is a loop exit block. After reorderNodes in StructurizeCFG, this block
|
||||
; remains after A rather than moving to after the loop due to a dependence on
|
||||
; the Flow block that contains the loop back edge.
|
||||
B:
|
||||
tail call fastcc void @check1(i32 1) #1
|
||||
br label %F
|
||||
|
||||
C:
|
||||
br i1 %PredB, label %D, label %E
|
||||
|
||||
; D is a loop exit block. After reorderNodes, this block remains in the loop
|
||||
; rather than moving to after the loop due to a dependence on the Flow block
|
||||
; that contains the loop back edge.
|
||||
D:
|
||||
tail call fastcc void @check1(i32 2) #1
|
||||
br label %F
|
||||
|
||||
; E is a latch and exiting block.
|
||||
E:
|
||||
%inc2 = add i32 %inc1, 1
|
||||
%cmp = icmp ult i32 %inc2, 10
|
||||
br i1 %cmp, label %A, label %G
|
||||
|
||||
; F is reached by the loop exit blocks, B and D.
|
||||
F:
|
||||
br label %G
|
||||
|
||||
G:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test when there are many loop exiting blocks. Previous tests have two exiting
|
||||
; blocks. This test contains five. Each of the exiting blocks remain in the loop
|
||||
; rather than being moved to after the loop.
|
||||
|
||||
define void @many_exiting_blocks(i1 %PredEntry, i1 %PredA, i1 %PredC, i1 %PredE, i1 %PredG) {
|
||||
; CHECK-LABEL: @many_exiting_blocks(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[PREDENTRY:%.*]], label [[A:%.*]], label [[FLOW11:%.*]]
|
||||
; CHECK: A:
|
||||
; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[TMP1:%.*]], [[FLOW4:%.*]] ], [ 0, [[ENTRY:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[PREDA:%.*]], label [[B:%.*]], label [[FLOW3:%.*]]
|
||||
; CHECK: B:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 1) #[[ATTR0]]
|
||||
; CHECK-NEXT: br label [[FLOW3]]
|
||||
; CHECK: Flow3:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[B]] ], [ true, [[A]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[C:%.*]], label [[FLOW4]]
|
||||
; CHECK: C:
|
||||
; CHECK-NEXT: br i1 [[PREDC:%.*]], label [[D:%.*]], label [[FLOW5:%.*]]
|
||||
; CHECK: Flow4:
|
||||
; CHECK-NEXT: [[TMP1]] = phi i32 [ [[TMP6:%.*]], [[FLOW6:%.*]] ], [ undef, [[FLOW3]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP7:%.*]], [[FLOW6]] ], [ true, [[FLOW3]] ]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ [[TMP8:%.*]], [[FLOW6]] ], [ false, [[FLOW3]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW6]] ], [ true, [[FLOW3]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_EXIT_GUARD:%.*]], label [[A]]
|
||||
; CHECK: D:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 2) #[[ATTR0]]
|
||||
; CHECK-NEXT: br label [[FLOW5]]
|
||||
; CHECK: Flow5:
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ false, [[D]] ], [ true, [[C]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label [[E:%.*]], label [[FLOW6]]
|
||||
; CHECK: E:
|
||||
; CHECK-NEXT: br i1 [[PREDE:%.*]], label [[F:%.*]], label [[FLOW7:%.*]]
|
||||
; CHECK: Flow6:
|
||||
; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP12:%.*]], [[FLOW8:%.*]] ], [ undef, [[FLOW5]] ]
|
||||
; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP13:%.*]], [[FLOW8]] ], [ true, [[FLOW5]] ]
|
||||
; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP14:%.*]], [[FLOW8]] ], [ false, [[FLOW5]] ]
|
||||
; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP15:%.*]], [[FLOW8]] ], [ true, [[FLOW5]] ]
|
||||
; CHECK-NEXT: br label [[FLOW4]]
|
||||
; CHECK: F:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 3) #[[ATTR0]]
|
||||
; CHECK-NEXT: br label [[FLOW7]]
|
||||
; CHECK: Flow7:
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ false, [[F]] ], [ true, [[E]] ]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[F]] ], [ true, [[E]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP11]], label [[G:%.*]], label [[FLOW8]]
|
||||
; CHECK: G:
|
||||
; CHECK-NEXT: br i1 [[PREDG:%.*]], label [[H:%.*]], label [[FLOW9:%.*]]
|
||||
; CHECK: Flow8:
|
||||
; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP19:%.*]], [[FLOW10:%.*]] ], [ undef, [[FLOW7]] ]
|
||||
; CHECK-NEXT: [[TMP13]] = phi i1 [ [[TMP20:%.*]], [[FLOW10]] ], [ [[TMP10]], [[FLOW7]] ]
|
||||
; CHECK-NEXT: [[TMP14]] = phi i1 [ [[TMP21:%.*]], [[FLOW10]] ], [ false, [[FLOW7]] ]
|
||||
; CHECK-NEXT: [[TMP15]] = phi i1 [ [[TMP22:%.*]], [[FLOW10]] ], [ true, [[FLOW7]] ]
|
||||
; CHECK-NEXT: br label [[FLOW6]]
|
||||
; CHECK: H:
|
||||
; CHECK-NEXT: tail call fastcc void @check(i32 4) #[[ATTR0]]
|
||||
; CHECK-NEXT: br label [[FLOW9]]
|
||||
; CHECK: Flow9:
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = phi i1 [ false, [[H]] ], [ [[TMP10]], [[G]] ]
|
||||
; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ false, [[H]] ], [ true, [[G]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP17]], label [[I:%.*]], label [[FLOW10]]
|
||||
; CHECK: I:
|
||||
; CHECK-NEXT: [[INC2:%.*]] = add i32 [[INC1]], 1
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[INC2]], 10
|
||||
; CHECK-NEXT: br label [[FLOW10]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[K:%.*]] ], [ true, [[LOOP_EXIT_GUARD1:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP18]], label [[J:%.*]], label [[FLOW1:%.*]]
|
||||
; CHECK: J:
|
||||
; CHECK-NEXT: br label [[FLOW1]]
|
||||
; CHECK: K:
|
||||
; CHECK-NEXT: br label [[FLOW:%.*]]
|
||||
; CHECK: Flow1:
|
||||
; CHECK-NEXT: br label [[FLOW2:%.*]]
|
||||
; CHECK: Flow2:
|
||||
; CHECK-NEXT: br label [[FLOW11]]
|
||||
; CHECK: L:
|
||||
; CHECK-NEXT: ret void
|
||||
; CHECK: Flow10:
|
||||
; CHECK-NEXT: [[TMP19]] = phi i32 [ [[INC2]], [[I]] ], [ undef, [[FLOW9]] ]
|
||||
; CHECK-NEXT: [[TMP20]] = phi i1 [ false, [[I]] ], [ [[TMP16]], [[FLOW9]] ]
|
||||
; CHECK-NEXT: [[TMP21]] = phi i1 [ true, [[I]] ], [ false, [[FLOW9]] ]
|
||||
; CHECK-NEXT: [[TMP22]] = phi i1 [ [[CMP]], [[I]] ], [ true, [[FLOW9]] ]
|
||||
; CHECK-NEXT: br label [[FLOW8]]
|
||||
; CHECK: Flow11:
|
||||
; CHECK-NEXT: br label [[L:%.*]]
|
||||
; CHECK: loop.exit.guard:
|
||||
; CHECK-NEXT: [[GUARD_L_INV:%.*]] = xor i1 [[TMP3]], true
|
||||
; CHECK-NEXT: [[GUARD_J_INV:%.*]] = xor i1 [[TMP2]], true
|
||||
; CHECK-NEXT: br i1 [[GUARD_L_INV]], label [[LOOP_EXIT_GUARD1]], label [[FLOW2]]
|
||||
; CHECK: loop.exit.guard1:
|
||||
; CHECK-NEXT: br i1 [[GUARD_J_INV]], label [[K]], label [[FLOW]]
|
||||
;
|
||||
entry:
|
||||
br i1 %PredEntry, label %A, label %L
|
||||
|
||||
; A is the loop header block.
|
||||
A:
|
||||
%inc1 = phi i32 [ 0, %entry ], [ %inc2, %I ]
|
||||
br i1 %PredA, label %B, label %C
|
||||
|
||||
; B is a loop exit block. After reorderNodes in StructurizeCFG, this block
|
||||
; remains after A rather than moving to after the loop due to a dependence
|
||||
; on the Flow block that contains the loop back edge.
|
||||
B:
|
||||
tail call fastcc void @check(i32 1) #0
|
||||
br label %loop.exit.guard
|
||||
|
||||
C:
|
||||
br i1 %PredC, label %D, label %E
|
||||
|
||||
; D is a loop exit block. After reorderNodes, this block remains in the loop
|
||||
; rather than moving to after the loop.
|
||||
D:
|
||||
tail call fastcc void @check(i32 2) #0
|
||||
br label %loop.exit.guard
|
||||
|
||||
E:
|
||||
br i1 %PredE, label %F, label %G
|
||||
|
||||
; F is a loop exit block. After reorderNodes, this block remains in the loop
|
||||
; rather than moving to after the loop.
|
||||
F:
|
||||
tail call fastcc void @check(i32 3) #0
|
||||
br label %loop.exit.guard
|
||||
|
||||
G:
|
||||
br i1 %PredG, label %H, label %I
|
||||
|
||||
; H is a loop exit block. After reorderNodes, this block remains in the loop
|
||||
; rather than moving to after the loop.
|
||||
H:
|
||||
tail call fastcc void @check(i32 4) #0
|
||||
br label %loop.exit.guard
|
||||
|
||||
; I is a latch block.
|
||||
I:
|
||||
%inc2 = add i32 %inc1, 1
|
||||
%cmp = icmp ult i32 %inc2, 10
|
||||
br i1 %cmp, label %A, label %loop.exit.guard
|
||||
|
||||
J:
|
||||
br label %L
|
||||
|
||||
K:
|
||||
br label %L
|
||||
|
||||
L:
|
||||
ret void
|
||||
|
||||
loop.exit.guard:
|
||||
%Guard.L = phi i1 [ true, %I ], [ false, %B ], [ false, %D ], [ false, %F ], [ false, %H ]
|
||||
%Guard.J = phi i1 [ false, %I ], [ true, %B ], [ true, %D ], [ false, %F ], [ false, %H ]
|
||||
br i1 %Guard.L, label %L, label %loop.exit.guard1
|
||||
|
||||
loop.exit.guard1:
|
||||
br i1 %Guard.J, label %J, label %K
|
||||
}
|
||||
|
||||
declare void @check(i32 noundef) #0
|
||||
declare void @check1(i32 noundef) #1
|
||||
|
||||
attributes #0 = { noreturn nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
|
||||
; When there are two blocks that can be moved closer to the predecessor,
|
||||
; maintain the original, relative order since swapping the order should not
|
||||
; provide any benefit. In this case both B and C have a single predecessor
|
||||
; and a single successor.
|
||||
|
||||
define void @same_predecessor(i1 %PredA) {
|
||||
; CHECK-LABEL: @same_predecessor(
|
||||
; CHECK-NEXT: A:
|
||||
; CHECK-NEXT: [[PREDA_INV:%.*]] = xor i1 [[PREDA:%.*]], true
|
||||
; CHECK-NEXT: br i1 [[PREDA_INV]], label [[B:%.*]], label [[FLOW:%.*]]
|
||||
; CHECK: B:
|
||||
; CHECK-NEXT: br label [[FLOW]]
|
||||
; CHECK: Flow:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ false, [[B]] ], [ true, [[A:%.*]] ]
|
||||
; CHECK-NEXT: br i1 [[TMP0]], label [[C:%.*]], label [[D:%.*]]
|
||||
; CHECK: C:
|
||||
; CHECK-NEXT: br label [[D]]
|
||||
; CHECK: D:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
A:
|
||||
br i1 %PredA, label %C, label %B
|
||||
|
||||
B:
|
||||
br label %D
|
||||
|
||||
C:
|
||||
br label %D
|
||||
|
||||
D:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue