forked from OSchip/llvm-project
350 lines
12 KiB
C++
350 lines
12 KiB
C++
//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass generates machine instructions for the CTR loops related pseudos:
|
|
// 1: MTCTRloop/DecreaseCTRloop
|
|
// 2: MTCTR8loop/DecreaseCTR8loop
|
|
//
|
|
// If a CTR loop can be generated:
|
|
// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
|
|
// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
|
|
// its user branch instruction can be deleted.
|
|
//
|
|
// If a CTR loop can not be generated due to clobber of CTR:
|
|
// 1: MTCTRloop/MTCTR8loop can be deleted.
|
|
// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
|
|
// a "cmplwi/cmpldi".
|
|
//
|
|
// This pass runs just before register allocation, because we don't want
|
|
// register allocator to allocate register for DecreaseCTRloop if a CTR can be
|
|
// generated or if a CTR loop can not be generated, we don't have any condition
|
|
// register for the new added "cmplwi/cmpldi".
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "PPC.h"
|
|
#include "PPCInstrInfo.h"
|
|
#include "PPCSubtarget.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
|
#include "llvm/CodeGen/MachineOperand.h"
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
#include "llvm/CodeGen/Register.h"
|
|
#include "llvm/InitializePasses.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/PassRegistry.h"
|
|
#include "llvm/Support/CodeGen.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include <cassert>
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "ppc-ctrloops"
|
|
|
|
STATISTIC(NumCTRLoops, "Number of CTR loops generated");
|
|
STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
|
|
|
|
namespace {
|
|
class PPCCTRLoops : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
PPCCTRLoops() : MachineFunctionPass(ID) {
|
|
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
AU.addRequired<MachineLoopInfo>();
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
private:
|
|
const PPCInstrInfo *TII = nullptr;
|
|
MachineRegisterInfo *MRI = nullptr;
|
|
|
|
bool processLoop(MachineLoop *ML);
|
|
bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
|
|
void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
|
|
MachineInstr *Dec);
|
|
void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
|
|
};
|
|
} // namespace
|
|
|
|
char PPCCTRLoops::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
|
|
false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
|
INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
|
|
false, false)
|
|
|
|
FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
|
|
|
|
bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
|
|
bool Changed = false;
|
|
|
|
auto &MLI = getAnalysis<MachineLoopInfo>();
|
|
TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
|
|
MRI = &MF.getRegInfo();
|
|
|
|
for (auto *ML : MLI) {
|
|
if (ML->isOutermost())
|
|
Changed |= processLoop(ML);
|
|
}
|
|
|
|
return Changed;
|
|
}
|
|
|
|
bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
|
|
if (!CheckReads) {
|
|
// If we are only checking for defs, that is we are going to find
|
|
// definitions before MTCTRloop, for this case:
|
|
// CTR defination inside the callee of a call instruction will not impact
|
|
// the defination of MTCTRloop, so we can use definesRegister() for the
|
|
// check, no need to check the regmask.
|
|
return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
|
|
}
|
|
|
|
if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
|
|
return true;
|
|
|
|
if (MI->getDesc().isCall())
|
|
return true;
|
|
|
|
// We define the CTR in the loop preheader, so if there is any CTR reader in
|
|
// the loop, we also can not use CTR loop form.
|
|
if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool PPCCTRLoops::processLoop(MachineLoop *ML) {
|
|
bool Changed = false;
|
|
|
|
// Align with HardwareLoop pass, process inner loops first.
|
|
for (MachineLoop *I : *ML)
|
|
Changed |= processLoop(I);
|
|
|
|
// If any inner loop is changed, outter loop must be without hardware loop
|
|
// intrinsics.
|
|
if (Changed)
|
|
return true;
|
|
|
|
auto IsLoopStart = [](MachineInstr &MI) {
|
|
return MI.getOpcode() == PPC::MTCTRloop ||
|
|
MI.getOpcode() == PPC::MTCTR8loop;
|
|
};
|
|
|
|
auto SearchForStart =
|
|
[&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
|
|
for (auto &MI : *MBB) {
|
|
if (IsLoopStart(MI))
|
|
return &MI;
|
|
}
|
|
return nullptr;
|
|
};
|
|
|
|
MachineInstr *Start = nullptr;
|
|
MachineInstr *Dec = nullptr;
|
|
bool InvalidCTRLoop = false;
|
|
|
|
MachineBasicBlock *Preheader = ML->getLoopPreheader();
|
|
// If there is no preheader for this loop, there must be no MTCTRloop
|
|
// either.
|
|
if (!Preheader)
|
|
return false;
|
|
|
|
Start = SearchForStart(Preheader);
|
|
// This is not a CTR loop candidate.
|
|
if (!Start)
|
|
return false;
|
|
|
|
// If CTR is live to the preheader, we can not redefine the CTR register.
|
|
if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
|
|
InvalidCTRLoop = true;
|
|
|
|
// Make sure there is also no CTR clobber in the block preheader between the
|
|
// begin and MTCTR.
|
|
for (MachineBasicBlock::reverse_instr_iterator I =
|
|
std::next(Start->getReverseIterator());
|
|
I != Preheader->instr_rend(); ++I)
|
|
// Only check the definitions of CTR. If there is non-dead definition for
|
|
// the CTR, we conservatively don't generate a CTR loop.
|
|
if (isCTRClobber(&*I, /* CheckReads */ false)) {
|
|
InvalidCTRLoop = true;
|
|
break;
|
|
}
|
|
|
|
// Make sure there is also no CTR clobber/user in the block preheader between
|
|
// MTCTR and the end.
|
|
for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
|
|
I != Preheader->instr_end(); ++I)
|
|
if (isCTRClobber(&*I, /* CheckReads */ true)) {
|
|
InvalidCTRLoop = true;
|
|
break;
|
|
}
|
|
|
|
// Find the CTR loop components and decide whether or not to fall back to a
|
|
// normal loop.
|
|
for (auto *MBB : reverse(ML->getBlocks())) {
|
|
for (auto &MI : *MBB) {
|
|
if (MI.getOpcode() == PPC::DecreaseCTRloop ||
|
|
MI.getOpcode() == PPC::DecreaseCTR8loop)
|
|
Dec = &MI;
|
|
else if (!InvalidCTRLoop)
|
|
// If any instruction clobber CTR, then we can not generate a CTR loop.
|
|
InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
|
|
}
|
|
if (Dec && InvalidCTRLoop)
|
|
break;
|
|
}
|
|
|
|
assert(Dec && "CTR loop is not complete!");
|
|
|
|
if (InvalidCTRLoop) {
|
|
expandNormalLoops(ML, Start, Dec);
|
|
++NumNormalLoops;
|
|
}
|
|
else {
|
|
expandCTRLoops(ML, Start, Dec);
|
|
++NumCTRLoops;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
|
|
MachineInstr *Dec) {
|
|
bool Is64Bit =
|
|
Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
|
|
|
|
MachineBasicBlock *Preheader = Start->getParent();
|
|
MachineBasicBlock *Exiting = Dec->getParent();
|
|
assert((Preheader && Exiting) &&
|
|
"Preheader and exiting should exist for CTR loop!");
|
|
|
|
assert(Dec->getOperand(1).getImm() == 1 &&
|
|
"Loop decrement stride must be 1");
|
|
|
|
unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
|
|
unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
|
|
|
|
Register PHIDef =
|
|
MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
|
|
: &PPC::GPRC_and_GPRC_NOR0RegClass);
|
|
|
|
Start->getParent()->getParent()->getProperties().reset(
|
|
MachineFunctionProperties::Property::NoPHIs);
|
|
|
|
// Generate "PHI" in the header block.
|
|
auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
|
|
DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
|
|
PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
|
|
|
|
Register ADDIDef =
|
|
MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
|
|
: &PPC::GPRC_and_GPRC_NOR0RegClass);
|
|
// Generate "addi -1" in the exiting block.
|
|
BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
|
|
.addReg(PHIDef)
|
|
.addImm(-1);
|
|
|
|
// Add other inputs for the PHI node.
|
|
if (ML->isLoopLatch(Exiting)) {
|
|
// There must be only two predecessors for the loop header, one is the
|
|
// Preheader and the other one is loop latch Exiting. In hardware loop
|
|
// insertion pass, the block containing DecreaseCTRloop must dominate all
|
|
// loop latches. So there must be only one latch.
|
|
assert(ML->getHeader()->pred_size() == 2 &&
|
|
"Loop header predecessor is not right!");
|
|
PHIMIB.addReg(ADDIDef).addMBB(Exiting);
|
|
} else {
|
|
// If the block containing DecreaseCTRloop is not a loop latch, we can use
|
|
// ADDIDef as the value for all other blocks for the PHI. In hardware loop
|
|
// insertion pass, the block containing DecreaseCTRloop must dominate all
|
|
// loop latches.
|
|
for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
|
|
if (ML->contains(P)) {
|
|
assert(ML->isLoopLatch(P) &&
|
|
"Loop's header in-loop predecessor is not loop latch!");
|
|
PHIMIB.addReg(ADDIDef).addMBB(P);
|
|
} else
|
|
assert(P == Preheader &&
|
|
"CTR loop should not be generated for irreducible loop!");
|
|
}
|
|
}
|
|
|
|
// Generate the compare in the exiting block.
|
|
Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
|
|
auto CMPMIB =
|
|
BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
|
|
.addReg(ADDIDef)
|
|
.addImm(0);
|
|
|
|
BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
|
|
Dec->getOperand(0).getReg())
|
|
.addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
|
|
|
|
// Remove the pseudo instructions.
|
|
Start->eraseFromParent();
|
|
Dec->eraseFromParent();
|
|
}
|
|
|
|
void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
|
|
MachineInstr *Dec) {
|
|
bool Is64Bit =
|
|
Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
|
|
|
|
MachineBasicBlock *Preheader = Start->getParent();
|
|
MachineBasicBlock *Exiting = Dec->getParent();
|
|
|
|
(void)Preheader;
|
|
assert((Preheader && Exiting) &&
|
|
"Preheader and exiting should exist for CTR loop!");
|
|
|
|
assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
|
|
|
|
unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
|
|
unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
|
|
auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
|
|
assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
|
|
"There should be only one user for loop decrement pseudo!");
|
|
|
|
unsigned Opcode = 0;
|
|
switch (BrInstr->getOpcode()) {
|
|
case PPC::BC:
|
|
Opcode = BDNZOpcode;
|
|
(void) ML;
|
|
assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
|
|
"Invalid ctr loop!");
|
|
break;
|
|
case PPC::BCn:
|
|
Opcode = BDZOpcode;
|
|
assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
|
|
"Invalid ctr loop!");
|
|
break;
|
|
default:
|
|
llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
|
|
}
|
|
|
|
// Generate "bdnz/bdz" in the exiting block just before the terminator.
|
|
BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
|
|
.addMBB(BrInstr->getOperand(1).getMBB());
|
|
|
|
// Remove the pseudo instructions.
|
|
BrInstr->eraseFromParent();
|
|
Dec->eraseFromParent();
|
|
}
|