[MachineSink] replace MachineLoop with MachineCycle
reapply62a9b36fcf
and fix module build failue: 1: remove MachineCycleInfoWrapperPass in MachinePassRegistry.def MachineCycleInfoWrapperPass is a anylysis pass, should not be there. 2: move the definition for MachineCycleInfoPrinterPass to cpp file. Otherwise, there are module conflicit for MachineCycleInfoWrapperPass in MachinePassRegistry.def and MachineCycleAnalysis.h after62a9b36fcf
. MachineCycle can handle irreducible loop. Natural loop analysis (MachineLoop) can not return correct loop depth if the loop is irreducible loop. And MachineSink is sensitive to the loop depth, see MachineSinking::isProfitableToSinkTo(). This patch tries to use MachineCycle so that we can handle irreducible loop better. Reviewed By: sameerds, MatzeB Differential Revision: https://reviews.llvm.org/D123995
This commit is contained in:
parent
ad1d60c3be
commit
d79275238f
|
@ -66,6 +66,44 @@ void GenericCycle<ContextT>::getExitBlocks(
|
|||
}
|
||||
}
|
||||
|
||||
template <typename ContextT>
|
||||
auto GenericCycle<ContextT>::getCyclePreheader() const -> BlockT * {
|
||||
BlockT *Predecessor = getCyclePredecessor();
|
||||
if (!Predecessor)
|
||||
return nullptr;
|
||||
|
||||
assert(isReducible() && "Cycle Predecessor must be in a reducible cycle!");
|
||||
|
||||
if (succ_size(Predecessor) != 1)
|
||||
return nullptr;
|
||||
|
||||
// Make sure we are allowed to hoist instructions into the predecessor.
|
||||
if (!Predecessor->isLegalToHoistInto())
|
||||
return nullptr;
|
||||
|
||||
return Predecessor;
|
||||
}
|
||||
|
||||
template <typename ContextT>
|
||||
auto GenericCycle<ContextT>::getCyclePredecessor() const -> BlockT * {
|
||||
if (!isReducible())
|
||||
return nullptr;
|
||||
|
||||
BlockT *Out = nullptr;
|
||||
|
||||
// Loop over the predecessors of the header node...
|
||||
BlockT *Header = getHeader();
|
||||
for (const auto Pred : predecessors(Header)) {
|
||||
if (!contains(Pred)) {
|
||||
if (Out && Out != Pred)
|
||||
return nullptr;
|
||||
Out = Pred;
|
||||
}
|
||||
}
|
||||
|
||||
return Out;
|
||||
}
|
||||
|
||||
/// \brief Helper class for computing cycle information.
|
||||
template <typename ContextT> class GenericCycleInfoCompute {
|
||||
using BlockT = typename ContextT::BlockT;
|
||||
|
@ -326,6 +364,18 @@ auto GenericCycleInfo<ContextT>::getCycle(const BlockT *Block) const
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
/// \brief get the depth for the cycle which containing a given block.
|
||||
///
|
||||
/// \returns the depth for the innermost cycle containing \p Block or 0 if it is
|
||||
/// not contained in any cycle.
|
||||
template <typename ContextT>
|
||||
unsigned GenericCycleInfo<ContextT>::getCycleDepth(const BlockT *Block) const {
|
||||
CycleT *Cycle = getCycle(Block);
|
||||
if (!Cycle)
|
||||
return 0;
|
||||
return Cycle->getDepth();
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// \brief Validate the internal consistency of the cycle tree.
|
||||
///
|
||||
|
|
|
@ -100,6 +100,10 @@ public:
|
|||
|
||||
BlockT *getHeader() const { return Entries[0]; }
|
||||
|
||||
const SmallVectorImpl<BlockT *> & getEntries() const {
|
||||
return Entries;
|
||||
}
|
||||
|
||||
/// \brief Return whether \p Block is an entry block of the cycle.
|
||||
bool isEntry(BlockT *Block) const { return is_contained(Entries, Block); }
|
||||
|
||||
|
@ -124,6 +128,16 @@ public:
|
|||
/// branched to.
|
||||
void getExitBlocks(SmallVectorImpl<BlockT *> &TmpStorage) const;
|
||||
|
||||
/// Return the preheader block for this cycle. Pre-header is well-defined for
|
||||
/// reducible cycle in docs/LoopTerminology.rst as: the only one entering
|
||||
/// block and its only edge is to the entry block. Return null for irreducible
|
||||
/// cycles.
|
||||
BlockT *getCyclePreheader() const;
|
||||
|
||||
/// If the cycle has exactly one entry with exactly one predecessor, return
|
||||
/// it, otherwise return nullptr.
|
||||
BlockT *getCyclePredecessor() const;
|
||||
|
||||
/// Iteration over child cycles.
|
||||
//@{
|
||||
using const_child_iterator_base =
|
||||
|
@ -239,6 +253,7 @@ public:
|
|||
const ContextT &getSSAContext() const { return Context; }
|
||||
|
||||
CycleT *getCycle(const BlockT *Block) const;
|
||||
unsigned getCycleDepth(const BlockT *Block) const;
|
||||
CycleT *getTopLevelParentCycle(const BlockT *Block) const;
|
||||
|
||||
/// Move \p Child to \p NewParent by manipulating Children vectors.
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
#include "llvm/ADT/GenericCycleInfo.h"
|
||||
#include "llvm/CodeGen/MachineSSAContext.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -25,6 +27,29 @@ extern template class GenericCycle<MachineSSAContext>;
|
|||
using MachineCycleInfo = GenericCycleInfo<MachineSSAContext>;
|
||||
using MachineCycle = MachineCycleInfo::CycleT;
|
||||
|
||||
/// Legacy analysis pass which computes a \ref MachineCycleInfo.
|
||||
class MachineCycleInfoWrapperPass : public MachineFunctionPass {
|
||||
MachineFunction *F = nullptr;
|
||||
MachineCycleInfo CI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
MachineCycleInfoWrapperPass();
|
||||
|
||||
MachineCycleInfo &getCycleInfo() { return CI; }
|
||||
const MachineCycleInfo &getCycleInfo() const { return CI; }
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
void releaseMemory() override;
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const override;
|
||||
};
|
||||
|
||||
// TODO: add this function to GenericCycle template after implementing IR
|
||||
// version.
|
||||
bool isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_CODEGEN_MACHINECYCLEANALYSIS_H
|
||||
|
|
|
@ -199,6 +199,5 @@ DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ())
|
|||
DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("machine-cycles", MachineCycleInfoWrapperPass, ())
|
||||
DUMMY_MACHINE_FUNCTION_PASS("print-machine-cycles", MachineCycleInfoPrinterPass, ())
|
||||
#undef DUMMY_MACHINE_FUNCTION_PASS
|
||||
|
|
|
@ -28,6 +28,8 @@ template <typename, bool> class DominatorTreeBase;
|
|||
|
||||
inline auto successors(MachineBasicBlock *BB) { return BB->successors(); }
|
||||
inline auto predecessors(MachineBasicBlock *BB) { return BB->predecessors(); }
|
||||
inline unsigned succ_size(MachineBasicBlock *BB) { return BB->succ_size(); }
|
||||
inline unsigned pred_size(MachineBasicBlock *BB) { return BB->pred_size(); }
|
||||
|
||||
template <> class GenericSSAContext<MachineFunction> {
|
||||
const MachineRegisterInfo *RegInfo = nullptr;
|
||||
|
|
|
@ -8,50 +8,15 @@
|
|||
|
||||
#include "llvm/CodeGen/MachineCycleAnalysis.h"
|
||||
#include "llvm/ADT/GenericCycleImpl.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineSSAContext.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
template class llvm::GenericCycleInfo<llvm::MachineSSAContext>;
|
||||
template class llvm::GenericCycle<llvm::MachineSSAContext>;
|
||||
|
||||
namespace {
|
||||
|
||||
/// Legacy analysis pass which computes a \ref MachineCycleInfo.
|
||||
class MachineCycleInfoWrapperPass : public MachineFunctionPass {
|
||||
MachineFunction *F = nullptr;
|
||||
MachineCycleInfo CI;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
MachineCycleInfoWrapperPass();
|
||||
|
||||
MachineCycleInfo &getCycleInfo() { return CI; }
|
||||
const MachineCycleInfo &getCycleInfo() const { return CI; }
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
void releaseMemory() override;
|
||||
void print(raw_ostream &OS, const Module *M = nullptr) const override;
|
||||
|
||||
// TODO: verify analysis
|
||||
};
|
||||
|
||||
class MachineCycleInfoPrinterPass : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
MachineCycleInfoPrinterPass();
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
char MachineCycleInfoWrapperPass::ID = 0;
|
||||
|
||||
MachineCycleInfoWrapperPass::MachineCycleInfoWrapperPass()
|
||||
|
@ -87,6 +52,16 @@ void MachineCycleInfoWrapperPass::releaseMemory() {
|
|||
F = nullptr;
|
||||
}
|
||||
|
||||
class MachineCycleInfoPrinterPass : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
MachineCycleInfoPrinterPass();
|
||||
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
};
|
||||
|
||||
char MachineCycleInfoPrinterPass::ID = 0;
|
||||
|
||||
MachineCycleInfoPrinterPass::MachineCycleInfoPrinterPass()
|
||||
|
@ -111,3 +86,62 @@ bool MachineCycleInfoPrinterPass::runOnMachineFunction(MachineFunction &F) {
|
|||
CI.print(errs());
|
||||
return false;
|
||||
}
|
||||
|
||||
bool llvm::isCycleInvariant(const MachineCycle *Cycle, MachineInstr &I) {
|
||||
MachineFunction *MF = I.getParent()->getParent();
|
||||
MachineRegisterInfo *MRI = &MF->getRegInfo();
|
||||
const TargetSubtargetInfo &ST = MF->getSubtarget();
|
||||
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
|
||||
const TargetInstrInfo *TII = ST.getInstrInfo();
|
||||
|
||||
// The instruction is cycle invariant if all of its operands are.
|
||||
for (const MachineOperand &MO : I.operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
|
||||
Register Reg = MO.getReg();
|
||||
if (Reg == 0)
|
||||
continue;
|
||||
|
||||
// An instruction that uses or defines a physical register can't e.g. be
|
||||
// hoisted, so mark this as not invariant.
|
||||
if (Register::isPhysicalRegister(Reg)) {
|
||||
if (MO.isUse()) {
|
||||
// If the physreg has no defs anywhere, it's just an ambient register
|
||||
// and we can freely move its uses. Alternatively, if it's allocatable,
|
||||
// it could get allocated to something with a def during allocation.
|
||||
// However, if the physreg is known to always be caller saved/restored
|
||||
// then this use is safe to hoist.
|
||||
if (!MRI->isConstantPhysReg(Reg) &&
|
||||
!(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
|
||||
!TII->isIgnorableUse(MO))
|
||||
return false;
|
||||
// Otherwise it's safe to move.
|
||||
continue;
|
||||
} else if (!MO.isDead()) {
|
||||
// A def that isn't dead can't be moved.
|
||||
return false;
|
||||
} else if (any_of(Cycle->getEntries(),
|
||||
[&](const MachineBasicBlock *Block) {
|
||||
return Block->isLiveIn(Reg);
|
||||
})) {
|
||||
// If the reg is live into any header of the cycle we can't hoist an
|
||||
// instruction which would clobber it.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!MO.isUse())
|
||||
continue;
|
||||
|
||||
assert(MRI->getVRegDef(Reg) && "Machine instr not mapped for this vreg?!");
|
||||
|
||||
// If the cycle contains the definition of an operand, then the instruction
|
||||
// isn't cycle invariant.
|
||||
if (Cycle->contains(MRI->getVRegDef(Reg)->getParent()))
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we got this far, the instruction is cycle invariant!
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
|
||||
#include "llvm/CodeGen/MachineCycleAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
@ -95,18 +96,18 @@ static cl::opt<unsigned> SinkLoadBlocksThreshold(
|
|||
cl::init(20), cl::Hidden);
|
||||
|
||||
static cl::opt<bool>
|
||||
SinkInstsIntoLoop("sink-insts-to-avoid-spills",
|
||||
cl::desc("Sink instructions into loops to avoid "
|
||||
"register spills"),
|
||||
cl::init(false), cl::Hidden);
|
||||
SinkInstsIntoCycle("sink-insts-to-avoid-spills",
|
||||
cl::desc("Sink instructions into cycles to avoid "
|
||||
"register spills"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static cl::opt<unsigned> SinkIntoLoopLimit(
|
||||
"machine-sink-loop-limit",
|
||||
cl::desc("The maximum number of instructions considered for loop sinking."),
|
||||
static cl::opt<unsigned> SinkIntoCycleLimit(
|
||||
"machine-sink-cycle-limit",
|
||||
cl::desc("The maximum number of instructions considered for cycle sinking."),
|
||||
cl::init(50), cl::Hidden);
|
||||
|
||||
STATISTIC(NumSunk, "Number of machine instructions sunk");
|
||||
STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop");
|
||||
STATISTIC(NumCycleSunk, "Number of machine instructions sunk into a cycle");
|
||||
STATISTIC(NumSplit, "Number of critical edges split");
|
||||
STATISTIC(NumCoalesces, "Number of copies coalesced");
|
||||
STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
|
||||
|
@ -119,7 +120,7 @@ namespace {
|
|||
MachineRegisterInfo *MRI; // Machine register information
|
||||
MachineDominatorTree *DT; // Machine dominator tree
|
||||
MachinePostDominatorTree *PDT; // Machine post dominator tree
|
||||
MachineLoopInfo *LI;
|
||||
MachineCycleInfo *CI;
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
const MachineBranchProbabilityInfo *MBPI;
|
||||
AliasAnalysis *AA;
|
||||
|
@ -180,8 +181,9 @@ namespace {
|
|||
AU.addRequired<AAResultsWrapperPass>();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addRequired<MachinePostDominatorTree>();
|
||||
AU.addRequired<MachineLoopInfo>();
|
||||
AU.addRequired<MachineCycleInfoWrapperPass>();
|
||||
AU.addRequired<MachineBranchProbabilityInfo>();
|
||||
AU.addPreserved<MachineCycleInfoWrapperPass>();
|
||||
AU.addPreserved<MachineLoopInfo>();
|
||||
if (UseBlockFreqInfo)
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
|
@ -232,9 +234,9 @@ namespace {
|
|||
MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
|
||||
bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
|
||||
|
||||
void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
|
||||
SmallVectorImpl<MachineInstr *> &Candidates);
|
||||
bool SinkIntoLoop(MachineLoop *L, MachineInstr &I);
|
||||
void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
|
||||
SmallVectorImpl<MachineInstr *> &Candidates);
|
||||
bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
|
||||
|
||||
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
|
||||
MachineBasicBlock *MBB,
|
||||
|
@ -261,7 +263,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
|
|||
"Machine code sinking", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
|
||||
INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
|
||||
"Machine code sinking", false, false)
|
||||
|
@ -378,26 +380,27 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
|
|||
return false;
|
||||
}
|
||||
|
||||
void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB,
|
||||
void MachineSinking::FindCycleSinkCandidates(
|
||||
MachineCycle *Cycle, MachineBasicBlock *BB,
|
||||
SmallVectorImpl<MachineInstr *> &Candidates) {
|
||||
for (auto &MI : *BB) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI);
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
|
||||
if (!TII->shouldSink(MI)) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this "
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
|
||||
"target\n");
|
||||
continue;
|
||||
}
|
||||
if (!L->isLoopInvariant(MI)) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n");
|
||||
if (!isCycleInvariant(Cycle, MI)) {
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
|
||||
continue;
|
||||
}
|
||||
bool DontMoveAcrossStore = true;
|
||||
if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
|
||||
continue;
|
||||
}
|
||||
if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
|
||||
continue;
|
||||
}
|
||||
if (MI.isConvergent())
|
||||
|
@ -409,7 +412,7 @@ void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *B
|
|||
if (!MRI->hasOneDef(MO.getReg()))
|
||||
continue;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
|
||||
Candidates.push_back(&MI);
|
||||
}
|
||||
}
|
||||
|
@ -425,22 +428,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
|
|||
MRI = &MF.getRegInfo();
|
||||
DT = &getAnalysis<MachineDominatorTree>();
|
||||
PDT = &getAnalysis<MachinePostDominatorTree>();
|
||||
LI = &getAnalysis<MachineLoopInfo>();
|
||||
CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
|
||||
MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
|
||||
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
|
||||
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
||||
RegClassInfo.runOnMachineFunction(MF);
|
||||
|
||||
// MachineSink currently uses MachineLoopInfo, which only recognizes natural
|
||||
// loops. As such, we could sink instructions into irreducible cycles, which
|
||||
// would be non-profitable.
|
||||
// WARNING: The current implementation of hasStoreBetween() is incorrect for
|
||||
// sinking into irreducible cycles (PR53990), this bailout is currently
|
||||
// necessary for correctness, not just profitability.
|
||||
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
|
||||
if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI))
|
||||
return false;
|
||||
|
||||
bool EverMadeChange = false;
|
||||
|
||||
while (true) {
|
||||
|
@ -473,32 +466,33 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
|
|||
EverMadeChange = true;
|
||||
}
|
||||
|
||||
if (SinkInstsIntoLoop) {
|
||||
SmallVector<MachineLoop *, 8> Loops(LI->begin(), LI->end());
|
||||
for (auto *L : Loops) {
|
||||
MachineBasicBlock *Preheader = LI->findLoopPreheader(L);
|
||||
if (SinkInstsIntoCycle) {
|
||||
SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
|
||||
CI->toplevel_end());
|
||||
for (auto *Cycle : Cycles) {
|
||||
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
|
||||
if (!Preheader) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
|
||||
continue;
|
||||
}
|
||||
SmallVector<MachineInstr *, 8> Candidates;
|
||||
FindLoopSinkCandidates(L, Preheader, Candidates);
|
||||
FindCycleSinkCandidates(Cycle, Preheader, Candidates);
|
||||
|
||||
// Walk the candidates in reverse order so that we start with the use
|
||||
// of a def-use chain, if there is any.
|
||||
// TODO: Sort the candidates using a cost-model.
|
||||
unsigned i = 0;
|
||||
for (MachineInstr *I : llvm::reverse(Candidates)) {
|
||||
if (i++ == SinkIntoLoopLimit) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
|
||||
if (i++ == SinkIntoCycleLimit) {
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
|
||||
"be analysed.");
|
||||
break;
|
||||
}
|
||||
|
||||
if (!SinkIntoLoop(L, *I))
|
||||
if (!SinkIntoCycle(Cycle, *I))
|
||||
break;
|
||||
EverMadeChange = true;
|
||||
++NumLoopSunk;
|
||||
++NumCycleSunk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -520,12 +514,12 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
|
|||
|
||||
// Don't bother sinking code out of unreachable blocks. In addition to being
|
||||
// unprofitable, it can also lead to infinite looping, because in an
|
||||
// unreachable loop there may be nowhere to stop.
|
||||
// unreachable cycle there may be nowhere to stop.
|
||||
if (!DT->isReachableFromEntry(&MBB)) return false;
|
||||
|
||||
bool MadeChange = false;
|
||||
|
||||
// Cache all successors, sorted by frequency info and loop depth.
|
||||
// Cache all successors, sorted by frequency info and cycle depth.
|
||||
AllSuccsCache AllSuccessors;
|
||||
|
||||
// Walk the basic block bottom-up. Remember if we saw a store.
|
||||
|
@ -644,13 +638,16 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
|
|||
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
|
||||
return false;
|
||||
|
||||
// Avoid breaking back edge. From == To means backedge for single BB loop.
|
||||
// Avoid breaking back edge. From == To means backedge for single BB cycle.
|
||||
if (!SplitEdges || FromBB == ToBB)
|
||||
return false;
|
||||
|
||||
// Check for backedges of more "complex" loops.
|
||||
if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
|
||||
LI->isLoopHeader(ToBB))
|
||||
MachineCycle *FromCycle = CI->getCycle(FromBB);
|
||||
MachineCycle *ToCycle = CI->getCycle(ToBB);
|
||||
|
||||
// Check for backedges of more "complex" cycles.
|
||||
if (FromCycle == ToCycle && FromCycle &&
|
||||
(!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
|
||||
return false;
|
||||
|
||||
// It's not always legal to break critical edges and sink the computation
|
||||
|
@ -753,9 +750,9 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
|
|||
if (!PDT->dominates(SuccToSinkTo, MBB))
|
||||
return true;
|
||||
|
||||
// It is profitable to sink an instruction from a deeper loop to a shallower
|
||||
// loop, even if the latter post-dominates the former (PR21115).
|
||||
if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo))
|
||||
// It is profitable to sink an instruction from a deeper cycle to a shallower
|
||||
// cycle, even if the latter post-dominates the former (PR21115).
|
||||
if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
|
||||
return true;
|
||||
|
||||
// Check if only use in post dominated block is PHI instruction.
|
||||
|
@ -776,11 +773,11 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
|
|||
FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
|
||||
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
|
||||
|
||||
MachineLoop *ML = LI->getLoopFor(MBB);
|
||||
MachineCycle *MCycle = CI->getCycle(MBB);
|
||||
|
||||
// If the instruction is not inside a loop, it is not profitable to sink MI to
|
||||
// If the instruction is not inside a cycle, it is not profitable to sink MI to
|
||||
// a post dominate block SuccToSinkTo.
|
||||
if (!ML)
|
||||
if (!MCycle)
|
||||
return false;
|
||||
|
||||
auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
|
||||
|
@ -798,7 +795,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
|
|||
return false;
|
||||
};
|
||||
|
||||
// If this instruction is inside a loop and sinking this instruction can make
|
||||
// If this instruction is inside a Cycle and sinking this instruction can make
|
||||
// more registers live range shorten, it is still prifitable.
|
||||
for (const MachineOperand &MO : MI.operands()) {
|
||||
// Ignore non-register operands.
|
||||
|
@ -826,14 +823,15 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
|
|||
return false;
|
||||
} else {
|
||||
MachineInstr *DefMI = MRI->getVRegDef(Reg);
|
||||
// DefMI is defined outside of loop. There should be no live range
|
||||
// impact for this operand. Defination outside of loop means:
|
||||
// 1: defination is outside of loop.
|
||||
// 2: defination is in this loop, but it is a PHI in the loop header.
|
||||
if (LI->getLoopFor(DefMI->getParent()) != ML ||
|
||||
(DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
|
||||
MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
|
||||
// DefMI is defined outside of cycle. There should be no live range
|
||||
// impact for this operand. Defination outside of cycle means:
|
||||
// 1: defination is outside of cycle.
|
||||
// 2: defination is in this cycle, but it is a PHI in the cycle header.
|
||||
if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
|
||||
Cycle->getHeader() == DefMI->getParent()))
|
||||
continue;
|
||||
// The DefMI is defined inside the loop.
|
||||
// The DefMI is defined inside the cycle.
|
||||
// If sinking this operand makes some register pressure set exceed limit,
|
||||
// it is not profitable.
|
||||
if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
|
||||
|
@ -843,8 +841,8 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
|
|||
}
|
||||
}
|
||||
|
||||
// If MI is in loop and all its operands are alive across the whole loop or if
|
||||
// no operand sinking make register pressure set exceed limit, it is
|
||||
// If MI is in cycle and all its operands are alive across the whole cycle or
|
||||
// if no operand sinking make register pressure set exceed limit, it is
|
||||
// profitable to sink MI.
|
||||
return true;
|
||||
}
|
||||
|
@ -876,14 +874,14 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
|
|||
AllSuccs.push_back(DTChild->getBlock());
|
||||
}
|
||||
|
||||
// Sort Successors according to their loop depth or block frequency info.
|
||||
// Sort Successors according to their cycle depth or block frequency info.
|
||||
llvm::stable_sort(
|
||||
AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
|
||||
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
|
||||
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
|
||||
bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
|
||||
return HasBlockFreq ? LHSFreq < RHSFreq
|
||||
: LI->getLoopDepth(L) < LI->getLoopDepth(R);
|
||||
: CI->getCycleDepth(L) < CI->getCycleDepth(R);
|
||||
});
|
||||
|
||||
auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
|
||||
|
@ -898,7 +896,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
|
|||
AllSuccsCache &AllSuccessors) {
|
||||
assert (MBB && "Invalid MachineBasicBlock!");
|
||||
|
||||
// Loop over all the operands of the specified instruction. If there is
|
||||
// loop over all the operands of the specified instruction. If there is
|
||||
// anything we can't handle, bail out.
|
||||
|
||||
// SuccToSinkTo - This is the successor to sink this instruction to, once we
|
||||
|
@ -945,7 +943,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
|
|||
// Otherwise, we should look at all the successors and decide which one
|
||||
// we should sink to. If we have reliable block frequency information
|
||||
// (frequency != 0) available, give successors with smaller frequencies
|
||||
// higher priority, otherwise prioritize smaller loop depths.
|
||||
// higher priority, otherwise prioritize smaller cycle depths.
|
||||
for (MachineBasicBlock *SuccBlock :
|
||||
GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
|
||||
bool LocalUse = false;
|
||||
|
@ -968,7 +966,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
|
|||
}
|
||||
|
||||
// It is not possible to sink an instruction into its own block. This can
|
||||
// happen with loops.
|
||||
// happen with cycles.
|
||||
if (MBB == SuccToSinkTo)
|
||||
return nullptr;
|
||||
|
||||
|
@ -1222,68 +1220,70 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
|
|||
return HasAliasedStore;
|
||||
}
|
||||
|
||||
/// Sink instructions into loops if profitable. This especially tries to prevent
|
||||
/// register spills caused by register pressure if there is little to no
|
||||
/// overhead moving instructions into loops.
|
||||
bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I);
|
||||
MachineBasicBlock *Preheader = L->getLoopPreheader();
|
||||
assert(Preheader && "Loop sink needs a preheader block");
|
||||
/// Sink instructions into cycles if profitable. This especially tries to
|
||||
/// prevent register spills caused by register pressure if there is little to no
|
||||
/// overhead moving instructions into cycles.
|
||||
bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
|
||||
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
|
||||
assert(Preheader && "Cycle sink needs a preheader block");
|
||||
MachineBasicBlock *SinkBlock = nullptr;
|
||||
bool CanSink = true;
|
||||
const MachineOperand &MO = I.getOperand(0);
|
||||
|
||||
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI);
|
||||
if (!L->contains(&MI)) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
|
||||
if (!Cycle->contains(MI.getParent())) {
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
|
||||
CanSink = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// FIXME: Come up with a proper cost model that estimates whether sinking
|
||||
// the instruction (and thus possibly executing it on every loop
|
||||
// the instruction (and thus possibly executing it on every cycle
|
||||
// iteration) is more expensive than a register.
|
||||
// For now assumes that copies are cheap and thus almost always worth it.
|
||||
if (!MI.isCopy()) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
|
||||
CanSink = false;
|
||||
break;
|
||||
}
|
||||
if (!SinkBlock) {
|
||||
SinkBlock = MI.getParent();
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: "
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
|
||||
<< printMBBReference(*SinkBlock) << "\n");
|
||||
continue;
|
||||
}
|
||||
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
|
||||
if (!SinkBlock) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
|
||||
CanSink = false;
|
||||
break;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " <<
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " <<
|
||||
printMBBReference(*SinkBlock) << "\n");
|
||||
}
|
||||
|
||||
if (!CanSink) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
|
||||
return false;
|
||||
}
|
||||
if (!SinkBlock) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
|
||||
return false;
|
||||
}
|
||||
if (SinkBlock == Preheader) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n");
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
|
||||
return false;
|
||||
}
|
||||
if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) {
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n");
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n");
|
||||
LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
|
||||
SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
|
||||
I);
|
||||
|
||||
|
@ -1407,9 +1407,11 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
|
|||
TryBreak = true;
|
||||
}
|
||||
|
||||
// Don't sink instructions into a loop.
|
||||
if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
|
||||
LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n");
|
||||
// Don't sink instructions into a cycle.
|
||||
if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
|
||||
(!CI->getCycle(SuccToSinkTo)->isReducible() ||
|
||||
CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
|
||||
LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
|
||||
TryBreak = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -132,6 +132,7 @@
|
|||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Cycle Info Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
|
||||
# RUN: -machine-sink-loop-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
|
||||
# RUN: -machine-sink-cycle-limit=1 -verify-machineinstrs %s -o - 2>&1 | \
|
||||
# RUN: FileCheck %s --check-prefix=SINK1
|
||||
#
|
||||
# RUN: llc -mtriple aarch64 -run-pass=machine-sink -sink-insts-to-avoid-spills \
|
||||
# RUN: -machine-sink-loop-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
|
||||
# RUN: -machine-sink-cycle-limit=2 -verify-machineinstrs %s -o - 2>&1 | \
|
||||
# RUN: FileCheck %s --check-prefix=SINK2
|
||||
|
||||
--- |
|
||||
|
|
|
@ -296,6 +296,7 @@
|
|||
; GCN-O1-NEXT: Machine Block Frequency Analysis
|
||||
; GCN-O1-NEXT: Machine Common Subexpression Elimination
|
||||
; GCN-O1-NEXT: MachinePostDominator Tree Construction
|
||||
; GCN-O1-NEXT: Machine Cycle Info Analysis
|
||||
; GCN-O1-NEXT: Machine code sinking
|
||||
; GCN-O1-NEXT: Peephole Optimizations
|
||||
; GCN-O1-NEXT: Remove dead machine instructions
|
||||
|
@ -574,6 +575,7 @@
|
|||
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
|
||||
; GCN-O1-OPTS-NEXT: Machine Common Subexpression Elimination
|
||||
; GCN-O1-OPTS-NEXT: MachinePostDominator Tree Construction
|
||||
; GCN-O1-OPTS-NEXT: Machine Cycle Info Analysis
|
||||
; GCN-O1-OPTS-NEXT: Machine code sinking
|
||||
; GCN-O1-OPTS-NEXT: Peephole Optimizations
|
||||
; GCN-O1-OPTS-NEXT: Remove dead machine instructions
|
||||
|
@ -861,6 +863,7 @@
|
|||
; GCN-O2-NEXT: Machine Block Frequency Analysis
|
||||
; GCN-O2-NEXT: Machine Common Subexpression Elimination
|
||||
; GCN-O2-NEXT: MachinePostDominator Tree Construction
|
||||
; GCN-O2-NEXT: Machine Cycle Info Analysis
|
||||
; GCN-O2-NEXT: Machine code sinking
|
||||
; GCN-O2-NEXT: Peephole Optimizations
|
||||
; GCN-O2-NEXT: Remove dead machine instructions
|
||||
|
@ -1161,6 +1164,7 @@
|
|||
; GCN-O3-NEXT: Machine Block Frequency Analysis
|
||||
; GCN-O3-NEXT: Machine Common Subexpression Elimination
|
||||
; GCN-O3-NEXT: MachinePostDominator Tree Construction
|
||||
; GCN-O3-NEXT: Machine Cycle Info Analysis
|
||||
; GCN-O3-NEXT: Machine code sinking
|
||||
; GCN-O3-NEXT: Peephole Optimizations
|
||||
; GCN-O3-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -91,6 +91,7 @@
|
|||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Cycle Info Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -111,6 +111,7 @@
|
|||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Cycle Info Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -90,6 +90,7 @@
|
|||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Cycle Info Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -107,6 +107,7 @@
|
|||
; CHECK-NEXT: Machine Block Frequency Analysis
|
||||
; CHECK-NEXT: Machine Common Subexpression Elimination
|
||||
; CHECK-NEXT: MachinePostDominator Tree Construction
|
||||
; CHECK-NEXT: Machine Cycle Info Analysis
|
||||
; CHECK-NEXT: Machine code sinking
|
||||
; CHECK-NEXT: Peephole Optimizations
|
||||
; CHECK-NEXT: Remove dead machine instructions
|
||||
|
|
|
@ -32,14 +32,13 @@ define dso_local void @fn() {
|
|||
; CHECK-NEXT: # implicit-def: $ebp
|
||||
; CHECK-NEXT: jmp .LBB0_1
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_15: # %for.inc
|
||||
; CHECK-NEXT: .LBB0_16: # %for.inc
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl %esi, %ecx
|
||||
; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
|
||||
; CHECK-NEXT: movb %dh, %dl
|
||||
; CHECK-NEXT: .LBB0_1: # %for.cond
|
||||
; CHECK-NEXT: # =>This Loop Header: Depth=1
|
||||
; CHECK-NEXT: # Child Loop BB0_19 Depth 2
|
||||
; CHECK-NEXT: # Child Loop BB0_20 Depth 2
|
||||
; CHECK-NEXT: cmpb $8, %dl
|
||||
; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
|
||||
; CHECK-NEXT: ja .LBB0_3
|
||||
|
@ -56,7 +55,7 @@ define dso_local void @fn() {
|
|||
; CHECK-NEXT: movb %cl, %dh
|
||||
; CHECK-NEXT: movl $0, h
|
||||
; CHECK-NEXT: cmpb $8, %dl
|
||||
; CHECK-NEXT: jg .LBB0_9
|
||||
; CHECK-NEXT: jg .LBB0_8
|
||||
; CHECK-NEXT: # %bb.5: # %if.then13
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl %eax, %esi
|
||||
|
@ -65,10 +64,12 @@ define dso_local void @fn() {
|
|||
; CHECK-NEXT: calll printf
|
||||
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
|
||||
; CHECK-NEXT: testb %bl, %bl
|
||||
; CHECK-NEXT: movl %esi, %ecx
|
||||
; CHECK-NEXT: # implicit-def: $eax
|
||||
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
|
||||
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
|
||||
; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
|
||||
; CHECK-NEXT: movb %dh, %dl
|
||||
; CHECK-NEXT: jne .LBB0_15
|
||||
; CHECK-NEXT: jne .LBB0_16
|
||||
; CHECK-NEXT: jmp .LBB0_6
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_3: # %if.then
|
||||
|
@ -77,82 +78,82 @@ define dso_local void @fn() {
|
|||
; CHECK-NEXT: calll printf
|
||||
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
|
||||
; CHECK-NEXT: # implicit-def: $eax
|
||||
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
|
||||
; CHECK-NEXT: jmp .LBB0_6
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_9: # %if.end21
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: # implicit-def: $ebp
|
||||
; CHECK-NEXT: jmp .LBB0_10
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_6: # %for.cond35
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movb %dl, %dh
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: movl %edi, %esi
|
||||
; CHECK-NEXT: movl $0, %edi
|
||||
; CHECK-NEXT: movb %cl, %dl
|
||||
; CHECK-NEXT: je .LBB0_19
|
||||
; CHECK-NEXT: # %bb.7: # %af
|
||||
; CHECK-NEXT: je .LBB0_7
|
||||
; CHECK-NEXT: .LBB0_11: # %af
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: testb %bl, %bl
|
||||
; CHECK-NEXT: jne .LBB0_8
|
||||
; CHECK-NEXT: .LBB0_16: # %if.end39
|
||||
; CHECK-NEXT: jne .LBB0_12
|
||||
; CHECK-NEXT: .LBB0_17: # %if.end39
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: testl %eax, %eax
|
||||
; CHECK-NEXT: je .LBB0_18
|
||||
; CHECK-NEXT: # %bb.17: # %if.then41
|
||||
; CHECK-NEXT: je .LBB0_19
|
||||
; CHECK-NEXT: # %bb.18: # %if.then41
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl $.str, (%esp)
|
||||
; CHECK-NEXT: calll printf
|
||||
; CHECK-NEXT: .LBB0_18: # %for.end46
|
||||
; CHECK-NEXT: .LBB0_19: # %for.end46
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl %esi, %edi
|
||||
; CHECK-NEXT: # implicit-def: $dl
|
||||
; CHECK-NEXT: # implicit-def: $dh
|
||||
; CHECK-NEXT: # implicit-def: $ebp
|
||||
; CHECK-NEXT: jmp .LBB0_20
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_19: # %for.cond47
|
||||
; CHECK-NEXT: .LBB0_8: # %if.end21
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: # implicit-def: $ebp
|
||||
; CHECK-NEXT: jmp .LBB0_9
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: movb %dl, %dh
|
||||
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_20: # %for.cond47
|
||||
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
|
||||
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
|
||||
; CHECK-NEXT: testb %bl, %bl
|
||||
; CHECK-NEXT: jne .LBB0_19
|
||||
; CHECK-NEXT: # %bb.20: # %for.cond47
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2
|
||||
; CHECK-NEXT: jne .LBB0_20
|
||||
; CHECK-NEXT: # %bb.21: # %for.cond47
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2
|
||||
; CHECK-NEXT: testb %bl, %bl
|
||||
; CHECK-NEXT: jne .LBB0_19
|
||||
; CHECK-NEXT: .LBB0_10: # %ae
|
||||
; CHECK-NEXT: jne .LBB0_20
|
||||
; CHECK-NEXT: .LBB0_9: # %ae
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: testb %bl, %bl
|
||||
; CHECK-NEXT: jne .LBB0_11
|
||||
; CHECK-NEXT: # %bb.12: # %if.end26
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: xorl %esi, %esi
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: je .LBB0_15
|
||||
; CHECK-NEXT: jne .LBB0_10
|
||||
; CHECK-NEXT: # %bb.13: # %if.end26
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: testl %ebp, %ebp
|
||||
; CHECK-NEXT: jne .LBB0_15
|
||||
; CHECK-NEXT: # %bb.14: # %if.then31
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: testb %dl, %dl
|
||||
; CHECK-NEXT: je .LBB0_16
|
||||
; CHECK-NEXT: # %bb.14: # %if.end26
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: xorl %esi, %esi
|
||||
; CHECK-NEXT: testl %ebp, %ebp
|
||||
; CHECK-NEXT: jne .LBB0_16
|
||||
; CHECK-NEXT: # %bb.15: # %if.then31
|
||||
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: xorl %ecx, %ecx
|
||||
; CHECK-NEXT: xorl %ebp, %ebp
|
||||
; CHECK-NEXT: jmp .LBB0_15
|
||||
; CHECK-NEXT: jmp .LBB0_16
|
||||
; CHECK-NEXT: .p2align 4, 0x90
|
||||
; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: movl %edi, %esi
|
||||
; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: # implicit-def: $eax
|
||||
; CHECK-NEXT: testb %bl, %bl
|
||||
; CHECK-NEXT: je .LBB0_16
|
||||
; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: je .LBB0_17
|
||||
; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1
|
||||
; CHECK-NEXT: # implicit-def: $edi
|
||||
; CHECK-NEXT: # implicit-def: $cl
|
||||
; CHECK-NEXT: # kill: killed $cl
|
||||
; CHECK-NEXT: # implicit-def: $dl
|
||||
; CHECK-NEXT: # implicit-def: $ebp
|
||||
; CHECK-NEXT: jmp .LBB0_6
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: jne .LBB0_11
|
||||
; CHECK-NEXT: jmp .LBB0_7
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
|
|
|
@ -93,12 +93,12 @@ define void @switch_trunc_phi_const(i32 %x) {
|
|||
; CHECK: # %bb.0: # %bb0
|
||||
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
|
||||
; CHECK-NEXT: movzbl %dil, %ecx
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: movl $3895, %edx # imm = 0xF37
|
||||
; CHECK-NEXT: decl %ecx
|
||||
; CHECK-NEXT: cmpl $54, %ecx
|
||||
; CHECK-NEXT: ja .LBB1_8
|
||||
; CHECK-NEXT: # %bb.1: # %bb0
|
||||
; CHECK-NEXT: movzbl %dil, %eax
|
||||
; CHECK-NEXT: movl $3895, %edx # imm = 0xF37
|
||||
; CHECK-NEXT: jmpq *.LJTI1_0(,%rcx,8)
|
||||
; CHECK-NEXT: .LBB1_8: # %default
|
||||
; CHECK-NEXT: retq
|
||||
|
|
|
@ -1377,8 +1377,6 @@ define i32 @irreducibleCFG() #4 {
|
|||
; ENABLE-NEXT: pushq %rbx
|
||||
; ENABLE-NEXT: pushq %rax
|
||||
; ENABLE-NEXT: .cfi_offset %rbx, -24
|
||||
; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
|
||||
; ENABLE-NEXT: movl (%rax), %edi
|
||||
; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax
|
||||
; ENABLE-NEXT: cmpb $0, (%rax)
|
||||
; ENABLE-NEXT: je LBB16_2
|
||||
|
@ -1388,20 +1386,24 @@ define i32 @irreducibleCFG() #4 {
|
|||
; ENABLE-NEXT: jmp LBB16_1
|
||||
; ENABLE-NEXT: LBB16_2: ## %split
|
||||
; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax
|
||||
; ENABLE-NEXT: xorl %ebx, %ebx
|
||||
; ENABLE-NEXT: cmpl $0, (%rax)
|
||||
; ENABLE-NEXT: je LBB16_4
|
||||
; ENABLE-NEXT: ## %bb.3: ## %for.body4.i
|
||||
; ENABLE-NEXT: je LBB16_3
|
||||
; ENABLE-NEXT: ## %bb.4: ## %for.body4.i
|
||||
; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
|
||||
; ENABLE-NEXT: movl (%rax), %edi
|
||||
; ENABLE-NEXT: xorl %ebx, %ebx
|
||||
; ENABLE-NEXT: xorl %eax, %eax
|
||||
; ENABLE-NEXT: callq _something
|
||||
; ENABLE-NEXT: jmp LBB16_5
|
||||
; ENABLE-NEXT: LBB16_3:
|
||||
; ENABLE-NEXT: xorl %ebx, %ebx
|
||||
; ENABLE-NEXT: .p2align 4, 0x90
|
||||
; ENABLE-NEXT: LBB16_4: ## %for.inc
|
||||
; ENABLE-NEXT: LBB16_5: ## %for.inc
|
||||
; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; ENABLE-NEXT: incl %ebx
|
||||
; ENABLE-NEXT: cmpl $7, %ebx
|
||||
; ENABLE-NEXT: jl LBB16_4
|
||||
; ENABLE-NEXT: ## %bb.5: ## %fn1.exit
|
||||
; ENABLE-NEXT: jl LBB16_5
|
||||
; ENABLE-NEXT: ## %bb.6: ## %fn1.exit
|
||||
; ENABLE-NEXT: xorl %eax, %eax
|
||||
; ENABLE-NEXT: addq $8, %rsp
|
||||
; ENABLE-NEXT: popq %rbx
|
||||
|
@ -1418,8 +1420,6 @@ define i32 @irreducibleCFG() #4 {
|
|||
; DISABLE-NEXT: pushq %rbx
|
||||
; DISABLE-NEXT: pushq %rax
|
||||
; DISABLE-NEXT: .cfi_offset %rbx, -24
|
||||
; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
|
||||
; DISABLE-NEXT: movl (%rax), %edi
|
||||
; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax
|
||||
; DISABLE-NEXT: cmpb $0, (%rax)
|
||||
; DISABLE-NEXT: je LBB16_2
|
||||
|
@ -1429,20 +1429,24 @@ define i32 @irreducibleCFG() #4 {
|
|||
; DISABLE-NEXT: jmp LBB16_1
|
||||
; DISABLE-NEXT: LBB16_2: ## %split
|
||||
; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax
|
||||
; DISABLE-NEXT: xorl %ebx, %ebx
|
||||
; DISABLE-NEXT: cmpl $0, (%rax)
|
||||
; DISABLE-NEXT: je LBB16_4
|
||||
; DISABLE-NEXT: ## %bb.3: ## %for.body4.i
|
||||
; DISABLE-NEXT: je LBB16_3
|
||||
; DISABLE-NEXT: ## %bb.4: ## %for.body4.i
|
||||
; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
|
||||
; DISABLE-NEXT: movl (%rax), %edi
|
||||
; DISABLE-NEXT: xorl %ebx, %ebx
|
||||
; DISABLE-NEXT: xorl %eax, %eax
|
||||
; DISABLE-NEXT: callq _something
|
||||
; DISABLE-NEXT: jmp LBB16_5
|
||||
; DISABLE-NEXT: LBB16_3:
|
||||
; DISABLE-NEXT: xorl %ebx, %ebx
|
||||
; DISABLE-NEXT: .p2align 4, 0x90
|
||||
; DISABLE-NEXT: LBB16_4: ## %for.inc
|
||||
; DISABLE-NEXT: LBB16_5: ## %for.inc
|
||||
; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1
|
||||
; DISABLE-NEXT: incl %ebx
|
||||
; DISABLE-NEXT: cmpl $7, %ebx
|
||||
; DISABLE-NEXT: jl LBB16_4
|
||||
; DISABLE-NEXT: ## %bb.5: ## %fn1.exit
|
||||
; DISABLE-NEXT: jl LBB16_5
|
||||
; DISABLE-NEXT: ## %bb.6: ## %fn1.exit
|
||||
; DISABLE-NEXT: xorl %eax, %eax
|
||||
; DISABLE-NEXT: addq $8, %rsp
|
||||
; DISABLE-NEXT: popq %rbx
|
||||
|
|
Loading…
Reference in New Issue