Revert "[LICM] Make LICM able to hoist phis"

This reverts commit r347190.

llvm-svn: 347225
This commit is contained in:
Benjamin Kramer 2018-11-19 16:51:57 +00:00
parent 47066bd5f7
commit 2cad359c91
3 changed files with 24 additions and 1477 deletions

View File

@ -31,7 +31,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
@ -42,7 +41,6 @@
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSA.h"
@ -77,8 +75,6 @@ using namespace llvm;
#define DEBUG_TYPE "licm"
STATISTIC(NumCreatedBlocks, "Number of blocks created");
STATISTIC(NumClonedBranches, "Number of branches cloned");
STATISTIC(NumSunk, "Number of instructions sunk out of loop");
STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
@ -107,7 +103,7 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
TargetTransformInfo *TTI, bool &FreeInLoop);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
@ -441,225 +437,6 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
return Changed;
}
// This is a helper class for hoistRegion to make it able to hoist control flow
// in order to be able to hoist phis. The way this works is that we initially
// start hoisting to the loop preheader, and when we see a loop invariant branch
// we make note of this. When we then come to hoist an instruction that's
// conditional on such a branch we duplicate the branch and the relevant control
// flow, then hoist the instruction into the block corresponding to its original
// block in the duplicated control flow.
class ControlFlowHoister {
private:
// Information about the loop we are hoisting from
LoopInfo *LI;
DominatorTree *DT;
Loop *CurLoop;
// A map of blocks in the loop to the block their instructions will be hoisted
// to.
DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
// The branches that we can hoist, mapped to the block that marks a
// convergence point of their control flow.
DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
public:
ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop)
: LI(LI), DT(DT), CurLoop(CurLoop) {}
void registerPossiblyHoistableBranch(BranchInst *BI) {
// We can only hoist conditional branches with loop invariant operands.
if (!BI->isConditional() || !CurLoop->hasLoopInvariantOperands(BI))
return;
// The branch destinations need to be in the loop, and we don't gain
// anything by duplicating conditional branches with duplicate successors,
// as it's essentially the same as an unconditional branch.
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);
if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
TrueDest == FalseDest)
return;
// We can hoist BI if one branch destination is the successor of the other,
// or both have common successor which we check by seeing if the
// intersection of their successors is non-empty.
// TODO: This could be expanded to allowing branches where both ends
// eventually converge to a single block.
SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
BasicBlock *CommonSucc = nullptr;
if (TrueDestSucc.count(FalseDest)) {
CommonSucc = FalseDest;
} else if (FalseDestSucc.count(TrueDest)) {
CommonSucc = TrueDest;
} else {
set_intersect(TrueDestSucc, FalseDestSucc);
// If there's one common successor use that.
if (TrueDestSucc.size() == 1)
CommonSucc = *TrueDestSucc.begin();
// If there's more than one pick whichever appears first in the block list
// (we can't use the value returned by TrueDestSucc.begin() as it's
// unpredicatable which element gets returned).
else if (!TrueDestSucc.empty()) {
Function *F = TrueDest->getParent();
auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
auto It = std::find_if(F->begin(), F->end(), IsSucc);
assert(It != F->end() && "Could not find successor in function");
CommonSucc = &*It;
}
}
// The common successor has to be dominated by the branch, as otherwise
// there will be some other path to the successor that will not be
// controlled by this branch so any phi we hoist would be controlled by the
// wrong condition. This also takes care of avoiding hoisting of loop back
// edges.
// TODO: In some cases this could be relaxed if the successor is dominated
// by another block that's been hoisted and we can guarantee that the
// control flow has been replicated exactly.
if (CommonSucc && DT->dominates(BI, CommonSucc))
HoistableBranches[BI] = CommonSucc;
}
bool canHoistPHI(PHINode *PN) {
// The phi must have loop invariant operands.
if (!CurLoop->hasLoopInvariantOperands(PN))
return false;
// We can hoist phis if the block they are in is the target of hoistable
// branches which cover all of the predecessors of the block.
SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
BasicBlock *BB = PN->getParent();
for (BasicBlock *PredBB : predecessors(BB))
PredecessorBlocks.insert(PredBB);
// If we have less predecessor blocks than predecessors then the phi will
// have more than one incoming value for the same block which we can't
// handle.
// TODO: This could be handled be erasing some of the duplicate incoming
// values.
if (PredecessorBlocks.size() != pred_size(BB))
return false;
for (auto &Pair : HoistableBranches) {
if (Pair.second == BB) {
// Which blocks are predecessors via this branch depends on if the
// branch is triangle-like or diamond-like.
if (Pair.first->getSuccessor(0) == BB) {
PredecessorBlocks.erase(Pair.first->getParent());
PredecessorBlocks.erase(Pair.first->getSuccessor(1));
} else if (Pair.first->getSuccessor(1) == BB) {
PredecessorBlocks.erase(Pair.first->getParent());
PredecessorBlocks.erase(Pair.first->getSuccessor(0));
} else {
PredecessorBlocks.erase(Pair.first->getSuccessor(0));
PredecessorBlocks.erase(Pair.first->getSuccessor(1));
}
}
}
// PredecessorBlocks will now be empty if for every predecessor of BB we
// found a hoistable branch source.
return PredecessorBlocks.empty();
}
BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
// If BB has already been hoisted, return that
if (HoistDestinationMap.count(BB))
return HoistDestinationMap[BB];
// Check if this block is conditional based on a pending branch
auto HasBBAsSuccessor =
[&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
Pair.first->getSuccessor(1) == BB);
};
auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
HasBBAsSuccessor);
// If not involved in a pending branch, hoist to preheader
BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
if (It == HoistableBranches.end()) {
LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
<< " as hoist destination for " << BB->getName()
<< "\n");
HoistDestinationMap[BB] = InitialPreheader;
return InitialPreheader;
}
BranchInst *BI = It->first;
assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
HoistableBranches.end() &&
"BB is expected to be the target of at most one branch");
LLVMContext &C = BB->getContext();
BasicBlock *TrueDest = BI->getSuccessor(0);
BasicBlock *FalseDest = BI->getSuccessor(1);
BasicBlock *CommonSucc = HoistableBranches[BI];
BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
// Create hoisted versions of blocks that currently don't have them
auto CreateHoistedBlock = [&](BasicBlock *Orig) {
if (HoistDestinationMap.count(Orig))
return HoistDestinationMap[Orig];
BasicBlock *New =
BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
HoistDestinationMap[Orig] = New;
DT->addNewBlock(New, HoistTarget);
if (CurLoop->getParentLoop())
CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
++NumCreatedBlocks;
LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
<< " as hoist destination for " << Orig->getName()
<< "\n");
return New;
};
BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
// Link up these blocks with branches.
if (!HoistCommonSucc->getTerminator()) {
// The new common successor we've generated will branch to whatever that
// hoist target branched to.
BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
assert(TargetSucc && "Expected hoist target to have a single successor");
HoistCommonSucc->moveBefore(TargetSucc);
BranchInst::Create(TargetSucc, HoistCommonSucc);
}
if (!HoistTrueDest->getTerminator()) {
HoistTrueDest->moveBefore(HoistCommonSucc);
BranchInst::Create(HoistCommonSucc, HoistTrueDest);
}
if (!HoistFalseDest->getTerminator()) {
HoistFalseDest->moveBefore(HoistCommonSucc);
BranchInst::Create(HoistCommonSucc, HoistFalseDest);
}
// If BI is being cloned to what was originally the preheader then
// HoistCommonSucc will now be the new preheader.
if (HoistTarget == InitialPreheader) {
// Phis in the loop header now need to use the new preheader.
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
// The new preheader dominates the loop header.
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
DT->changeImmediateDominator(HeaderNode, PreheaderNode);
// The preheader hoist destination is now the new preheader, with the
// exception of the hoist destination of this branch.
for (auto &Pair : HoistDestinationMap)
if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
Pair.second = HoistCommonSucc;
}
// Now finally clone BI.
ReplaceInstWithInst(
HoistTarget->getTerminator(),
BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
++NumClonedBranches;
assert(CurLoop->getLoopPreheader() &&
"Hoisting blocks should not have destroyed preheader");
return HoistDestinationMap[BB];
}
};
/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first
/// order w.r.t the DominatorTree. This allows us to visit definitions before
@ -674,23 +451,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion");
ControlFlowHoister CFH(LI, DT, CurLoop);
// We want to visit parents before children. We will enque all the parents
// before their children in the worklist and process the worklist in order.
SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
// Keep track of instructions that have been hoisted, as they may need to be
// re-hoisted if they end up not dominating all of their uses.
SmallVector<Instruction *, 16> HoistedInstructions;
// Record what the original preheader is, as we'll need it later if we need to
// re-hoist instructions.
BasicBlock *OriginalPreheader = CurLoop->getLoopPreheader();
// For PHI hoisting to work we need to hoist blocks before their successors.
// We can do this by iterating through the blocks in the loop in reverse
// post-order.
LoopBlocksRPO Worklist(CurLoop);
Worklist.perform(LI);
bool Changed = false;
for (BasicBlock *BB : Worklist) {
for (DomTreeNode *DTN : Worklist) {
BasicBlock *BB = DTN->getBlock();
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
if (inSubLoop(BB, CurLoop, LI))
@ -716,16 +483,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// Try hoisting the instruction out to the preheader. We can only do
// this if all of the operands of the instruction are loop invariant and
// if it is safe to hoist the instruction.
// TODO: It may be safe to hoist if we are hoisting to a conditional block
// and we have accurately duplicated the control flow from the loop header
// to that block.
//
if (CurLoop->hasLoopInvariantOperands(&I) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
isSafeToExecuteUnconditionally(
I, DT, CurLoop, SafetyInfo, ORE,
CurLoop->getLoopPreheader()->getTerminator())) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE);
HoistedInstructions.push_back(&I);
hoist(I, DT, CurLoop, SafetyInfo, ORE);
Changed = true;
continue;
}
@ -750,9 +514,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
I.replaceAllUsesWith(Product);
eraseInstruction(I, *SafetyInfo, CurAST);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
SafetyInfo, ORE);
HoistedInstructions.push_back(ReciprocalDivisor);
hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
Changed = true;
continue;
}
@ -764,58 +526,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
CurLoop->hasLoopInvariantOperands(&I) &&
SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE);
HoistedInstructions.push_back(&I);
hoist(I, DT, CurLoop, SafetyInfo, ORE);
Changed = true;
continue;
}
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
if (CFH.canHoistPHI(PN)) {
// Redirect incoming blocks first to ensure that we create hoisted
// versions of those blocks before we hoist the phi.
for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
PN->setIncomingBlock(
i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
ORE);
assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
Changed = true;
continue;
}
}
// Remember possibly hoistable branches so we can actually hoist them
// later if needed.
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
CFH.registerPossiblyHoistableBranch(BI);
}
}
// If we hoisted instructions to a conditional block they may not dominate
// their uses that weren't hoisted (such as phis where some operands are not
// loop invariant). If so make them unconditional by moving them to the end of
// the original preheader, which is guaranteed to dominate everything in the
// loop. We iterate through the instructions in reverse order which ensures
// that when we rehoist an instruction we rehoist its operands.
Instruction *HoistPoint = OriginalPreheader->getTerminator();
for (Instruction *I : reverse(HoistedInstructions)) {
if (!llvm::all_of(I->uses(), [&](Use &U) { return DT->dominates(I, U); })) {
LLVM_DEBUG(dbgs() << "LICM rehoisting to " << OriginalPreheader->getName()
<< ": " << *I << "\n");
moveInstructionBefore(*I, *HoistPoint, *SafetyInfo);
HoistPoint = I;
Changed = true;
}
}
// Now that we've finished hoisting make sure that LI and DT are still valid.
#ifndef NDEBUG
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
"Dominator tree verification failed");
LI->verify(*DT);
#endif
return Changed;
}
@ -1383,9 +1100,9 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
/// is safe to hoist, this instruction is called to do the dirty work.
///
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
auto *Preheader = CurLoop->getLoopPreheader();
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
<< "\n");
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
@ -1403,12 +1120,8 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
!SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
I.dropUnknownNonDebugMetadata();
if (isa<PHINode>(I))
// Move the new node to the end of the phi list in the destination block.
moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo);
else
// Move the new node to the destination block, before its terminator.
moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
// Move the new node to the Preheader, before its terminator.
moveInstructionBefore(I, *Preheader->getTerminator(), *SafetyInfo);
// Do not retain debug locations when we are moving instructions to different
// basic blocks, because we want to avoid jumpy line tables. Calls, however,

File diff suppressed because it is too large Load Diff

View File

@ -266,26 +266,19 @@ for.end: ; preds = %for.body
; variant/invariant values being stored to invariant address.
; test checks that the last element of the phi is extracted and scalar stored
; into the uniform address within the loop.
; Since the condition and the phi is loop invariant, they are LICM'ed before
; Since the condition and the phi is loop invariant, they are LICM'ed after
; vectorization.
; CHECK-LABEL: inv_val_store_to_inv_address_conditional_inv
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NTRUNC]], [[K:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_STORE_LICM:.*]], label %[[COND_STORE_K_LICM:.*]]
; CHECK: [[COND_STORE_LICM]]:
; CHECK-NEXT: br label %[[LATCH_LICM:.*]]
; CHECK: [[COND_STORE_K_LICM]]:
; CHECK-NEXT: br label %[[LATCH_LICM]]
; CHECK: [[LATCH_LICM]]:
; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], %[[COND_STORE_LICM]] ], [ [[K]], %[[COND_STORE_K_LICM]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1
; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]]
@ -298,13 +291,17 @@ for.end: ; preds = %for.body
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[NTRUNC]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> undef, i1 [[CMP]], i32 3
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[K]], i32 3
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32>* [[TMP7]], align 4
; CHECK-NEXT: store i32 [[STOREVAL]], i32* [[A]], align 4
; CHECK-NEXT: store i32 [[TMP5]], i32* [[A]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
@ -324,6 +321,7 @@ for.end: ; preds = %for.body
; CHECK: cond_store_k:
; CHECK-NEXT: br label [[LATCH]]
; CHECK: latch:
; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], [[COND_STORE]] ], [ [[K]], [[COND_STORE_K]] ]
; CHECK-NEXT: store i32 [[STOREVAL]], i32* [[A]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]