forked from OSchip/llvm-project
[PowerPC] Optimize compare by using record form in post-RA.
Summary: We currently optimize the comparison only in SSA, therefore we will miss some optimization opportunities where the input of comparison is lowered from COPY in post-RA. Ie. ExpandPostRA::LowerCopy is called after PPCInstrInfo::optimizeCompareInstr. This patch optimizes the comparison in post-RA and only the cases that compare against zero can be handled. D131374 converts the comparison and its user to a compare against zero with the appropriate predicate on the branch, which creates additional opportunities for this patch. Reviewed By: shchenz, lkail Differential Revision: https://reviews.llvm.org/D131873
This commit is contained in:
parent
cb33ef7ca7
commit
d1115c2b84
|
@ -2768,6 +2768,85 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
|
||||
MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
|
||||
if (MRI->isSSA())
|
||||
return false;
|
||||
|
||||
Register SrcReg, SrcReg2;
|
||||
int64_t CmpMask, CmpValue;
|
||||
if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
|
||||
return false;
|
||||
|
||||
// Try to optimize the comparison against 0.
|
||||
if (CmpValue || !CmpMask || SrcReg2)
|
||||
return false;
|
||||
|
||||
// The record forms set the condition register based on a signed comparison
|
||||
// with zero (see comments in optimizeCompareInstr). Since we can't do the
|
||||
// equality checks in post-RA, we are more restricted on a unsigned
|
||||
// comparison.
|
||||
unsigned Opc = CmpMI.getOpcode();
|
||||
if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
|
||||
return false;
|
||||
|
||||
// The record forms are always based on a 64-bit comparison on PPC64
|
||||
// (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
|
||||
// comparison. Since we can't do the equality checks in post-RA, we bail out
|
||||
// the case.
|
||||
if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
|
||||
return false;
|
||||
|
||||
// CmpMI can't be deleted if it has implicit def.
|
||||
if (CmpMI.hasImplicitDef())
|
||||
return false;
|
||||
|
||||
bool SrcRegHasOtherUse = false;
|
||||
MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
|
||||
if (!SrcMI || !SrcMI->definesRegister(SrcReg))
|
||||
return false;
|
||||
|
||||
MachineOperand RegMO = CmpMI.getOperand(0);
|
||||
Register CRReg = RegMO.getReg();
|
||||
if (CRReg != PPC::CR0)
|
||||
return false;
|
||||
|
||||
// Make sure there is no def/use of CRReg between SrcMI and CmpMI.
|
||||
bool SeenUseOfCRReg = false;
|
||||
bool IsCRRegKilled = false;
|
||||
if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
|
||||
SeenUseOfCRReg) ||
|
||||
SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
|
||||
return false;
|
||||
|
||||
int SrcMIOpc = SrcMI->getOpcode();
|
||||
int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
|
||||
if (NewOpC == -1)
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Replace Instr: ");
|
||||
LLVM_DEBUG(SrcMI->dump());
|
||||
|
||||
const MCInstrDesc &NewDesc = get(NewOpC);
|
||||
SrcMI->setDesc(NewDesc);
|
||||
MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
|
||||
.addReg(CRReg, RegState::ImplicitDefine);
|
||||
SrcMI->clearRegisterDeads(CRReg);
|
||||
|
||||
// Fix up killed/dead flag for SrcReg after transformation.
|
||||
if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill())
|
||||
fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg);
|
||||
|
||||
assert(SrcMI->definesRegister(PPC::CR0) &&
|
||||
"Record-form instruction does not define cr0?");
|
||||
|
||||
LLVM_DEBUG(dbgs() << "with: ");
|
||||
LLVM_DEBUG(SrcMI->dump());
|
||||
LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
|
||||
LLVM_DEBUG(CmpMI.dump());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
|
||||
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
|
||||
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
|
||||
|
@ -4427,7 +4506,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
|
|||
bool PPCInstrInfo::isRegElgibleForForwarding(
|
||||
const MachineOperand &RegMO, const MachineInstr &DefMI,
|
||||
const MachineInstr &MI, bool KillDefMI,
|
||||
bool &IsFwdFeederRegKilled) const {
|
||||
bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
|
||||
// x = addi y, imm
|
||||
// ...
|
||||
// z = lfdx 0, x -> z = lfd imm(y)
|
||||
|
@ -4449,6 +4528,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
|
|||
return false;
|
||||
else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
|
||||
IsFwdFeederRegKilled = true;
|
||||
if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
|
||||
SeenIntermediateUse = true;
|
||||
// Made it to DefMI without encountering a clobber.
|
||||
if ((&*It) == &DefMI)
|
||||
break;
|
||||
|
@ -4888,9 +4969,10 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
|
|||
return false;
|
||||
|
||||
bool IsFwdFeederRegKilled = false;
|
||||
bool SeenIntermediateUse = false;
|
||||
// Check if the RegMO can be forwarded to MI.
|
||||
if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
|
||||
IsFwdFeederRegKilled))
|
||||
IsFwdFeederRegKilled, SeenIntermediateUse))
|
||||
return false;
|
||||
|
||||
// Get killed info in case fixup needed after transformation.
|
||||
|
|
|
@ -251,7 +251,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
|
|||
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
|
||||
const MachineInstr &DefMI,
|
||||
const MachineInstr &MI, bool KillDefMI,
|
||||
bool &IsFwdFeederRegKilled) const;
|
||||
bool &IsFwdFeederRegKilled,
|
||||
bool &SeenIntermediateUse) const;
|
||||
unsigned getSpillTarget() const;
|
||||
const unsigned *getStoreOpcodesForSpillArray() const;
|
||||
const unsigned *getLoadOpcodesForSpillArray() const;
|
||||
|
@ -644,6 +645,8 @@ public:
|
|||
int64_t &Offset, unsigned &Width,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
bool optimizeCmpPostRA(MachineInstr &MI) const;
|
||||
|
||||
/// Get the base operand and byte offset of an instruction that reads/writes
|
||||
/// memory.
|
||||
bool getMemOperandsWithOffsetWidth(
|
||||
|
|
|
@ -38,6 +38,8 @@ STATISTIC(NumberOfSelfCopies,
|
|||
"Number of self copy instructions eliminated");
|
||||
STATISTIC(NumFrameOffFoldInPreEmit,
|
||||
"Number of folding frame offset by using r+r in pre-emit peephole");
|
||||
STATISTIC(NumCmpsInPreEmit,
|
||||
"Number of compares eliminated in pre-emit peephole");
|
||||
|
||||
static cl::opt<bool>
|
||||
EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
|
||||
|
@ -508,6 +510,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
|
|||
LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
|
||||
LLVM_DEBUG(MI.dump());
|
||||
}
|
||||
if (TII->optimizeCmpPostRA(MI)) {
|
||||
Changed = true;
|
||||
NumCmpsInPreEmit++;
|
||||
LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
|
||||
LLVM_DEBUG(MI.dump());
|
||||
InstrsToErase.push_back(&MI);
|
||||
}
|
||||
}
|
||||
|
||||
// Eliminate conditional branch based on a constant CR bit by
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
# RUN: llc -mtriple=powerpc64le-linux-gnu -stop-after ppc-pre-emit-peephole %s -o - -verify-machineinstrs | FileCheck %s
|
||||
|
||||
---
|
||||
name: test1
|
||||
# The cmp instr is optimized with the record form.
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x30000000), %bb.2(0x50000000)
|
||||
liveins: $x3, $x4
|
||||
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
|
||||
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3
|
||||
; CHECK-LABEL: name: test1
|
||||
; CHECK: renamable $x3 = OR8_rec renamable $x3, killed renamable $x4, implicit-def $cr0
|
||||
; CHECK-NOT: CMPDI
|
||||
BCC 68, killed renamable $cr0, %bb.2
|
||||
|
||||
bb.1:
|
||||
$x3 = LI8 102
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
|
||||
bb.2:
|
||||
$x3 = LI8 116
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
---
|
||||
name: test2
|
||||
# The imm of the comparison instr isn't 0.
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x30000000), %bb.2(0x50000000)
|
||||
liveins: $x3, $x4
|
||||
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
|
||||
renamable $cr0 = CMPDI renamable $x3, 2, implicit killed $x3
|
||||
; CHECK-LABEL: name: test2
|
||||
; CHECK: CMPDI
|
||||
BCC 68, killed renamable $cr0, %bb.2
|
||||
|
||||
bb.1:
|
||||
$x3 = LI8 102
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
|
||||
bb.2:
|
||||
$x3 = LI8 116
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
---
|
||||
name: test3
|
||||
# The comparison instr has a implicit def.
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x30000000), %bb.2(0x50000000)
|
||||
liveins: $x3, $x4
|
||||
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
|
||||
renamable $cr0 = CMPDI renamable $x3, 0, implicit-def $x3
|
||||
; CHECK-LABEL: name: test3
|
||||
; CHECK: CMPDI
|
||||
BCC 68, killed renamable $cr0, %bb.2
|
||||
|
||||
bb.1:
|
||||
$x3 = LI8 102
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
|
||||
bb.2:
|
||||
$x3 = LI8 116
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
---
|
||||
name: test4
|
||||
# There is another use for cr0 between OR8 instr and CMPWI instr.
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x30000000), %bb.2(0x50000000)
|
||||
liveins: $x3, $x4, $cr0
|
||||
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
|
||||
renamable $cr1 = MCRF killed $cr0, implicit $x3
|
||||
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
|
||||
; CHECK-LABEL: name: test4
|
||||
; CHECK: CMPDI
|
||||
BCC 68, killed renamable $cr0, %bb.2
|
||||
|
||||
bb.1:
|
||||
$x3 = LI8 102
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
|
||||
bb.2:
|
||||
$x3 = LI8 116
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
---
|
||||
name: test5
|
||||
# There is another def for cr0 between OR8 instr and CMPWI instr.
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x30000000), %bb.2(0x50000000)
|
||||
liveins: $x3, $x4
|
||||
renamable $x3 = OR8 killed renamable $x3, renamable $x4
|
||||
renamable $cr1 = CMPD renamable $x3, renamable $x4, implicit-def $cr0
|
||||
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
|
||||
; CHECK-LABEL: name: test5
|
||||
; CHECK: CMPDI
|
||||
BCC 68, killed renamable $cr0, %bb.2
|
||||
|
||||
bb.1:
|
||||
$x3 = LI8 102
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
|
||||
bb.2:
|
||||
$x3 = LI8 116
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
||||
|
||||
---
|
||||
name: test6
|
||||
# The SrcReg isn't CR0.
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
successors: %bb.1(0x30000000), %bb.2(0x50000000)
|
||||
liveins: $x3, $x4
|
||||
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
|
||||
renamable $cr1 = CMPDI renamable $x3, 0, implicit killed $x3
|
||||
; CHECK-LABEL: name: test6
|
||||
; CHECK: CMPDI
|
||||
BCC 68, killed renamable $cr1, %bb.2
|
||||
|
||||
bb.1:
|
||||
$x3 = LI8 102
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
|
||||
bb.2:
|
||||
$x3 = LI8 116
|
||||
BLR8 implicit $lr8, implicit $rm, implicit $x3
|
||||
...
|
|
@ -2946,10 +2946,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
|
|||
; LE-P10-O0-NEXT: std r0, 16(r1)
|
||||
; LE-P10-O0-NEXT: hashst r0, -8(r1)
|
||||
; LE-P10-O0-NEXT: stdu r1, -64(r1)
|
||||
; LE-P10-O0-NEXT: mr r4, r3
|
||||
; LE-P10-O0-NEXT: mr. r4, r3
|
||||
; LE-P10-O0-NEXT: std r4, 40(r1) # 8-byte Folded Spill
|
||||
; LE-P10-O0-NEXT: li r3, 0
|
||||
; LE-P10-O0-NEXT: cmpdi r4, 0
|
||||
; LE-P10-O0-NEXT: stw r3, 48(r1) # 4-byte Folded Spill
|
||||
; LE-P10-O0-NEXT: beq cr0, .LBB2_2
|
||||
; LE-P10-O0-NEXT: # %bb.1: # %if.end
|
||||
|
@ -2979,10 +2978,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
|
|||
; LE-P9-O0-NEXT: std r0, 16(r1)
|
||||
; LE-P9-O0-NEXT: hashst r0, -8(r1)
|
||||
; LE-P9-O0-NEXT: stdu r1, -128(r1)
|
||||
; LE-P9-O0-NEXT: mr r4, r3
|
||||
; LE-P9-O0-NEXT: mr. r4, r3
|
||||
; LE-P9-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
|
||||
; LE-P9-O0-NEXT: li r3, 0
|
||||
; LE-P9-O0-NEXT: cmpdi r4, 0
|
||||
; LE-P9-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
|
||||
; LE-P9-O0-NEXT: beq cr0, .LBB2_2
|
||||
; LE-P9-O0-NEXT: # %bb.1: # %if.end
|
||||
|
@ -3012,10 +3010,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
|
|||
; LE-P8-O0-NEXT: std r0, 16(r1)
|
||||
; LE-P8-O0-NEXT: hashst r0, -8(r1)
|
||||
; LE-P8-O0-NEXT: stdu r1, -128(r1)
|
||||
; LE-P8-O0-NEXT: mr r4, r3
|
||||
; LE-P8-O0-NEXT: mr. r4, r3
|
||||
; LE-P8-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
|
||||
; LE-P8-O0-NEXT: li r3, 0
|
||||
; LE-P8-O0-NEXT: cmpdi r4, 0
|
||||
; LE-P8-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
|
||||
; LE-P8-O0-NEXT: beq cr0, .LBB2_2
|
||||
; LE-P8-O0-NEXT: # %bb.1: # %if.end
|
||||
|
|
Loading…
Reference in New Issue