Revert "[LICM] Make LICM able to hoist phis"
This reverts commit r347190. llvm-svn: 347225
This commit is contained in:
parent
47066bd5f7
commit
2cad359c91
|
@ -31,7 +31,6 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Transforms/Scalar/LICM.h"
|
||||
#include "llvm/ADT/SetOperations.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/AliasSetTracker.h"
|
||||
|
@ -42,7 +41,6 @@
|
|||
#include "llvm/Analysis/GuardUtils.h"
|
||||
#include "llvm/Analysis/Loads.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/LoopIterator.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/MemorySSA.h"
|
||||
|
@ -77,8 +75,6 @@ using namespace llvm;
|
|||
|
||||
#define DEBUG_TYPE "licm"
|
||||
|
||||
STATISTIC(NumCreatedBlocks, "Number of blocks created");
|
||||
STATISTIC(NumClonedBranches, "Number of branches cloned");
|
||||
STATISTIC(NumSunk, "Number of instructions sunk out of loop");
|
||||
STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
|
||||
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
|
||||
|
@ -107,7 +103,7 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
|
|||
const LoopSafetyInfo *SafetyInfo,
|
||||
TargetTransformInfo *TTI, bool &FreeInLoop);
|
||||
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
|
||||
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
|
||||
ICFLoopSafetyInfo *SafetyInfo,
|
||||
OptimizationRemarkEmitter *ORE);
|
||||
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
|
||||
const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
|
||||
|
@ -441,225 +437,6 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
return Changed;
|
||||
}
|
||||
|
||||
// This is a helper class for hoistRegion to make it able to hoist control flow
|
||||
// in order to be able to hoist phis. The way this works is that we initially
|
||||
// start hoisting to the loop preheader, and when we see a loop invariant branch
|
||||
// we make note of this. When we then come to hoist an instruction that's
|
||||
// conditional on such a branch we duplicate the branch and the relevant control
|
||||
// flow, then hoist the instruction into the block corresponding to its original
|
||||
// block in the duplicated control flow.
|
||||
class ControlFlowHoister {
|
||||
private:
|
||||
// Information about the loop we are hoisting from
|
||||
LoopInfo *LI;
|
||||
DominatorTree *DT;
|
||||
Loop *CurLoop;
|
||||
|
||||
// A map of blocks in the loop to the block their instructions will be hoisted
|
||||
// to.
|
||||
DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
|
||||
|
||||
// The branches that we can hoist, mapped to the block that marks a
|
||||
// convergence point of their control flow.
|
||||
DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
|
||||
|
||||
public:
|
||||
ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop)
|
||||
: LI(LI), DT(DT), CurLoop(CurLoop) {}
|
||||
|
||||
void registerPossiblyHoistableBranch(BranchInst *BI) {
|
||||
// We can only hoist conditional branches with loop invariant operands.
|
||||
if (!BI->isConditional() || !CurLoop->hasLoopInvariantOperands(BI))
|
||||
return;
|
||||
|
||||
// The branch destinations need to be in the loop, and we don't gain
|
||||
// anything by duplicating conditional branches with duplicate successors,
|
||||
// as it's essentially the same as an unconditional branch.
|
||||
BasicBlock *TrueDest = BI->getSuccessor(0);
|
||||
BasicBlock *FalseDest = BI->getSuccessor(1);
|
||||
if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
|
||||
TrueDest == FalseDest)
|
||||
return;
|
||||
|
||||
// We can hoist BI if one branch destination is the successor of the other,
|
||||
// or both have common successor which we check by seeing if the
|
||||
// intersection of their successors is non-empty.
|
||||
// TODO: This could be expanded to allowing branches where both ends
|
||||
// eventually converge to a single block.
|
||||
SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
|
||||
TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
|
||||
FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
|
||||
BasicBlock *CommonSucc = nullptr;
|
||||
if (TrueDestSucc.count(FalseDest)) {
|
||||
CommonSucc = FalseDest;
|
||||
} else if (FalseDestSucc.count(TrueDest)) {
|
||||
CommonSucc = TrueDest;
|
||||
} else {
|
||||
set_intersect(TrueDestSucc, FalseDestSucc);
|
||||
// If there's one common successor use that.
|
||||
if (TrueDestSucc.size() == 1)
|
||||
CommonSucc = *TrueDestSucc.begin();
|
||||
// If there's more than one pick whichever appears first in the block list
|
||||
// (we can't use the value returned by TrueDestSucc.begin() as it's
|
||||
// unpredicatable which element gets returned).
|
||||
else if (!TrueDestSucc.empty()) {
|
||||
Function *F = TrueDest->getParent();
|
||||
auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
|
||||
auto It = std::find_if(F->begin(), F->end(), IsSucc);
|
||||
assert(It != F->end() && "Could not find successor in function");
|
||||
CommonSucc = &*It;
|
||||
}
|
||||
}
|
||||
// The common successor has to be dominated by the branch, as otherwise
|
||||
// there will be some other path to the successor that will not be
|
||||
// controlled by this branch so any phi we hoist would be controlled by the
|
||||
// wrong condition. This also takes care of avoiding hoisting of loop back
|
||||
// edges.
|
||||
// TODO: In some cases this could be relaxed if the successor is dominated
|
||||
// by another block that's been hoisted and we can guarantee that the
|
||||
// control flow has been replicated exactly.
|
||||
if (CommonSucc && DT->dominates(BI, CommonSucc))
|
||||
HoistableBranches[BI] = CommonSucc;
|
||||
}
|
||||
|
||||
bool canHoistPHI(PHINode *PN) {
|
||||
// The phi must have loop invariant operands.
|
||||
if (!CurLoop->hasLoopInvariantOperands(PN))
|
||||
return false;
|
||||
// We can hoist phis if the block they are in is the target of hoistable
|
||||
// branches which cover all of the predecessors of the block.
|
||||
SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
|
||||
BasicBlock *BB = PN->getParent();
|
||||
for (BasicBlock *PredBB : predecessors(BB))
|
||||
PredecessorBlocks.insert(PredBB);
|
||||
// If we have less predecessor blocks than predecessors then the phi will
|
||||
// have more than one incoming value for the same block which we can't
|
||||
// handle.
|
||||
// TODO: This could be handled be erasing some of the duplicate incoming
|
||||
// values.
|
||||
if (PredecessorBlocks.size() != pred_size(BB))
|
||||
return false;
|
||||
for (auto &Pair : HoistableBranches) {
|
||||
if (Pair.second == BB) {
|
||||
// Which blocks are predecessors via this branch depends on if the
|
||||
// branch is triangle-like or diamond-like.
|
||||
if (Pair.first->getSuccessor(0) == BB) {
|
||||
PredecessorBlocks.erase(Pair.first->getParent());
|
||||
PredecessorBlocks.erase(Pair.first->getSuccessor(1));
|
||||
} else if (Pair.first->getSuccessor(1) == BB) {
|
||||
PredecessorBlocks.erase(Pair.first->getParent());
|
||||
PredecessorBlocks.erase(Pair.first->getSuccessor(0));
|
||||
} else {
|
||||
PredecessorBlocks.erase(Pair.first->getSuccessor(0));
|
||||
PredecessorBlocks.erase(Pair.first->getSuccessor(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
// PredecessorBlocks will now be empty if for every predecessor of BB we
|
||||
// found a hoistable branch source.
|
||||
return PredecessorBlocks.empty();
|
||||
}
|
||||
|
||||
BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
|
||||
// If BB has already been hoisted, return that
|
||||
if (HoistDestinationMap.count(BB))
|
||||
return HoistDestinationMap[BB];
|
||||
|
||||
// Check if this block is conditional based on a pending branch
|
||||
auto HasBBAsSuccessor =
|
||||
[&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
|
||||
return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
|
||||
Pair.first->getSuccessor(1) == BB);
|
||||
};
|
||||
auto It = std::find_if(HoistableBranches.begin(), HoistableBranches.end(),
|
||||
HasBBAsSuccessor);
|
||||
|
||||
// If not involved in a pending branch, hoist to preheader
|
||||
BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
|
||||
if (It == HoistableBranches.end()) {
|
||||
LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
|
||||
<< " as hoist destination for " << BB->getName()
|
||||
<< "\n");
|
||||
HoistDestinationMap[BB] = InitialPreheader;
|
||||
return InitialPreheader;
|
||||
}
|
||||
BranchInst *BI = It->first;
|
||||
assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
|
||||
HoistableBranches.end() &&
|
||||
"BB is expected to be the target of at most one branch");
|
||||
|
||||
LLVMContext &C = BB->getContext();
|
||||
BasicBlock *TrueDest = BI->getSuccessor(0);
|
||||
BasicBlock *FalseDest = BI->getSuccessor(1);
|
||||
BasicBlock *CommonSucc = HoistableBranches[BI];
|
||||
BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
|
||||
|
||||
// Create hoisted versions of blocks that currently don't have them
|
||||
auto CreateHoistedBlock = [&](BasicBlock *Orig) {
|
||||
if (HoistDestinationMap.count(Orig))
|
||||
return HoistDestinationMap[Orig];
|
||||
BasicBlock *New =
|
||||
BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
|
||||
HoistDestinationMap[Orig] = New;
|
||||
DT->addNewBlock(New, HoistTarget);
|
||||
if (CurLoop->getParentLoop())
|
||||
CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
|
||||
++NumCreatedBlocks;
|
||||
LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
|
||||
<< " as hoist destination for " << Orig->getName()
|
||||
<< "\n");
|
||||
return New;
|
||||
};
|
||||
BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
|
||||
BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
|
||||
BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
|
||||
|
||||
// Link up these blocks with branches.
|
||||
if (!HoistCommonSucc->getTerminator()) {
|
||||
// The new common successor we've generated will branch to whatever that
|
||||
// hoist target branched to.
|
||||
BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
|
||||
assert(TargetSucc && "Expected hoist target to have a single successor");
|
||||
HoistCommonSucc->moveBefore(TargetSucc);
|
||||
BranchInst::Create(TargetSucc, HoistCommonSucc);
|
||||
}
|
||||
if (!HoistTrueDest->getTerminator()) {
|
||||
HoistTrueDest->moveBefore(HoistCommonSucc);
|
||||
BranchInst::Create(HoistCommonSucc, HoistTrueDest);
|
||||
}
|
||||
if (!HoistFalseDest->getTerminator()) {
|
||||
HoistFalseDest->moveBefore(HoistCommonSucc);
|
||||
BranchInst::Create(HoistCommonSucc, HoistFalseDest);
|
||||
}
|
||||
|
||||
// If BI is being cloned to what was originally the preheader then
|
||||
// HoistCommonSucc will now be the new preheader.
|
||||
if (HoistTarget == InitialPreheader) {
|
||||
// Phis in the loop header now need to use the new preheader.
|
||||
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
|
||||
// The new preheader dominates the loop header.
|
||||
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
|
||||
DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
|
||||
DT->changeImmediateDominator(HeaderNode, PreheaderNode);
|
||||
// The preheader hoist destination is now the new preheader, with the
|
||||
// exception of the hoist destination of this branch.
|
||||
for (auto &Pair : HoistDestinationMap)
|
||||
if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
|
||||
Pair.second = HoistCommonSucc;
|
||||
}
|
||||
|
||||
// Now finally clone BI.
|
||||
ReplaceInstWithInst(
|
||||
HoistTarget->getTerminator(),
|
||||
BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
|
||||
++NumClonedBranches;
|
||||
|
||||
assert(CurLoop->getLoopPreheader() &&
|
||||
"Hoisting blocks should not have destroyed preheader");
|
||||
return HoistDestinationMap[BB];
|
||||
}
|
||||
};
|
||||
|
||||
/// Walk the specified region of the CFG (defined by all blocks dominated by
|
||||
/// the specified block, and that are in the current loop) in depth first
|
||||
/// order w.r.t the DominatorTree. This allows us to visit definitions before
|
||||
|
@ -674,23 +451,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
|
||||
"Unexpected input to hoistRegion");
|
||||
|
||||
ControlFlowHoister CFH(LI, DT, CurLoop);
|
||||
// We want to visit parents before children. We will enque all the parents
|
||||
// before their children in the worklist and process the worklist in order.
|
||||
SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
|
||||
|
||||
// Keep track of instructions that have been hoisted, as they may need to be
|
||||
// re-hoisted if they end up not dominating all of their uses.
|
||||
SmallVector<Instruction *, 16> HoistedInstructions;
|
||||
|
||||
// Record what the original preheader is, as we'll need it later if we need to
|
||||
// re-hoist instructions.
|
||||
BasicBlock *OriginalPreheader = CurLoop->getLoopPreheader();
|
||||
|
||||
// For PHI hoisting to work we need to hoist blocks before their successors.
|
||||
// We can do this by iterating through the blocks in the loop in reverse
|
||||
// post-order.
|
||||
LoopBlocksRPO Worklist(CurLoop);
|
||||
Worklist.perform(LI);
|
||||
bool Changed = false;
|
||||
for (BasicBlock *BB : Worklist) {
|
||||
for (DomTreeNode *DTN : Worklist) {
|
||||
BasicBlock *BB = DTN->getBlock();
|
||||
// Only need to process the contents of this block if it is not part of a
|
||||
// subloop (which would already have been processed).
|
||||
if (inSubLoop(BB, CurLoop, LI))
|
||||
|
@ -716,16 +483,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
// Try hoisting the instruction out to the preheader. We can only do
|
||||
// this if all of the operands of the instruction are loop invariant and
|
||||
// if it is safe to hoist the instruction.
|
||||
// TODO: It may be safe to hoist if we are hoisting to a conditional block
|
||||
// and we have accurately duplicated the control flow from the loop header
|
||||
// to that block.
|
||||
//
|
||||
if (CurLoop->hasLoopInvariantOperands(&I) &&
|
||||
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
|
||||
isSafeToExecuteUnconditionally(
|
||||
I, DT, CurLoop, SafetyInfo, ORE,
|
||||
CurLoop->getLoopPreheader()->getTerminator())) {
|
||||
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE);
|
||||
HoistedInstructions.push_back(&I);
|
||||
hoist(I, DT, CurLoop, SafetyInfo, ORE);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -750,9 +514,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
I.replaceAllUsesWith(Product);
|
||||
eraseInstruction(I, *SafetyInfo, CurAST);
|
||||
|
||||
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
|
||||
SafetyInfo, ORE);
|
||||
HoistedInstructions.push_back(ReciprocalDivisor);
|
||||
hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -764,58 +526,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
|
|||
CurLoop->hasLoopInvariantOperands(&I) &&
|
||||
SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
|
||||
SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop)) {
|
||||
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, ORE);
|
||||
HoistedInstructions.push_back(&I);
|
||||
hoist(I, DT, CurLoop, SafetyInfo, ORE);
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (PHINode *PN = dyn_cast<PHINode>(&I)) {
|
||||
if (CFH.canHoistPHI(PN)) {
|
||||
// Redirect incoming blocks first to ensure that we create hoisted
|
||||
// versions of those blocks before we hoist the phi.
|
||||
for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
|
||||
PN->setIncomingBlock(
|
||||
i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
|
||||
hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
|
||||
ORE);
|
||||
assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
|
||||
Changed = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Remember possibly hoistable branches so we can actually hoist them
|
||||
// later if needed.
|
||||
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
|
||||
CFH.registerPossiblyHoistableBranch(BI);
|
||||
}
|
||||
}
|
||||
|
||||
// If we hoisted instructions to a conditional block they may not dominate
|
||||
// their uses that weren't hoisted (such as phis where some operands are not
|
||||
// loop invariant). If so make them unconditional by moving them to the end of
|
||||
// the original preheader, which is guaranteed to dominate everything in the
|
||||
// loop. We iterate through the instructions in reverse order which ensures
|
||||
// that when we rehoist an instruction we rehoist its operands.
|
||||
Instruction *HoistPoint = OriginalPreheader->getTerminator();
|
||||
for (Instruction *I : reverse(HoistedInstructions)) {
|
||||
if (!llvm::all_of(I->uses(), [&](Use &U) { return DT->dominates(I, U); })) {
|
||||
LLVM_DEBUG(dbgs() << "LICM rehoisting to " << OriginalPreheader->getName()
|
||||
<< ": " << *I << "\n");
|
||||
moveInstructionBefore(*I, *HoistPoint, *SafetyInfo);
|
||||
HoistPoint = I;
|
||||
Changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we've finished hoisting make sure that LI and DT are still valid.
|
||||
#ifndef NDEBUG
|
||||
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
|
||||
"Dominator tree verification failed");
|
||||
LI->verify(*DT);
|
||||
#endif
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
@ -1383,9 +1100,9 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
|
|||
/// is safe to hoist, this instruction is called to do the dirty work.
|
||||
///
|
||||
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
|
||||
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
|
||||
OptimizationRemarkEmitter *ORE) {
|
||||
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getName() << ": " << I
|
||||
ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
|
||||
auto *Preheader = CurLoop->getLoopPreheader();
|
||||
LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
|
||||
<< "\n");
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
|
||||
|
@ -1403,12 +1120,8 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
|
|||
!SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
|
||||
I.dropUnknownNonDebugMetadata();
|
||||
|
||||
if (isa<PHINode>(I))
|
||||
// Move the new node to the end of the phi list in the destination block.
|
||||
moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo);
|
||||
else
|
||||
// Move the new node to the destination block, before its terminator.
|
||||
moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo);
|
||||
// Move the new node to the Preheader, before its terminator.
|
||||
moveInstructionBefore(I, *Preheader->getTerminator(), *SafetyInfo);
|
||||
|
||||
// Do not retain debug locations when we are moving instructions to different
|
||||
// basic blocks, because we want to avoid jumpy line tables. Calls, however,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -266,26 +266,19 @@ for.end: ; preds = %for.body
|
|||
; variant/invariant values being stored to invariant address.
|
||||
; test checks that the last element of the phi is extracted and scalar stored
|
||||
; into the uniform address within the loop.
|
||||
; Since the condition and the phi is loop invariant, they are LICM'ed before
|
||||
; Since the condition and the phi is loop invariant, they are LICM'ed after
|
||||
; vectorization.
|
||||
; CHECK-LABEL: inv_val_store_to_inv_address_conditional_inv
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
|
||||
; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
|
||||
; CHECK-NEXT: [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NTRUNC]], [[K:%.*]]
|
||||
; CHECK-NEXT: br i1 [[CMP]], label %[[COND_STORE_LICM:.*]], label %[[COND_STORE_K_LICM:.*]]
|
||||
; CHECK: [[COND_STORE_LICM]]:
|
||||
; CHECK-NEXT: br label %[[LATCH_LICM:.*]]
|
||||
; CHECK: [[COND_STORE_K_LICM]]:
|
||||
; CHECK-NEXT: br label %[[LATCH_LICM]]
|
||||
; CHECK: [[LATCH_LICM]]:
|
||||
; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], %[[COND_STORE_LICM]] ], [ [[K]], %[[COND_STORE_K_LICM]] ]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[N]], 1
|
||||
; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i64 [[N]], i64 1
|
||||
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
|
||||
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
||||
; CHECK: vector.memcheck:
|
||||
; CHECK-NEXT: [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
|
||||
; CHECK-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[N]], 1
|
||||
; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP1]], i64 [[N]], i64 1
|
||||
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[SMAX2]]
|
||||
|
@ -298,13 +291,17 @@ for.end: ; preds = %for.body
|
|||
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
|
||||
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[NTRUNC]], i32 0
|
||||
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> undef, i1 [[CMP]], i32 3
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[K]], i32 3
|
||||
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32> [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[PREDPHI]], i32 3
|
||||
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
||||
; CHECK: vector.body:
|
||||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
|
||||
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT6]], <4 x i32>* [[TMP7]], align 4
|
||||
; CHECK-NEXT: store i32 [[STOREVAL]], i32* [[A]], align 4
|
||||
; CHECK-NEXT: store i32 [[TMP5]], i32* [[A]], align 4
|
||||
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
|
||||
|
@ -324,6 +321,7 @@ for.end: ; preds = %for.body
|
|||
; CHECK: cond_store_k:
|
||||
; CHECK-NEXT: br label [[LATCH]]
|
||||
; CHECK: latch:
|
||||
; CHECK-NEXT: [[STOREVAL:%.*]] = phi i32 [ [[NTRUNC]], [[COND_STORE]] ], [ [[K]], [[COND_STORE_K]] ]
|
||||
; CHECK-NEXT: store i32 [[STOREVAL]], i32* [[A]], align 4
|
||||
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
|
||||
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
|
||||
|
|
Loading…
Reference in New Issue