diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index a762ed7c51d4..b16fa79cd662 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -156,6 +156,14 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, if (!DstRC) return false; + // The ext instr may be operating on a sub-register of SrcReg as well. + // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit + // register. + // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of + // SrcReg:SubIdx should be replaced. + bool UseSrcSubIdx = TM->getRegisterInfo()-> + getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; + // The source has other uses. See if we can replace the other uses with use of // the result of the extension. SmallPtrSet ReachedBBs; @@ -184,6 +192,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; } + // Only accept uses of SrcReg:SubIdx. + if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) + continue; + // It's an error to translate this: // // %reg1025 = %reg1024 @@ -259,10 +271,14 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, } unsigned NewVR = MRI->createVirtualRegister(RC); - BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::COPY), NewVR) + MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) .addReg(DstReg, 0, SubIdx); - + // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. + if (UseSrcSubIdx) { + Copy->getOperand(0).setSubReg(SubIdx); + Copy->getOperand(0).setIsUndef(); + } UseMO->setReg(NewVR); ++NumReuse; Changed = true; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 28b3bc1596e1..47f09dca77d3 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -79,6 +79,22 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( return new PPCScoreboardHazardRecognizer(II, DAG); } + +// Detect 32 -> 64-bit extensions where we may reuse the low sub-register. +bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: return false; + case PPC::EXTSW: + case PPC::EXTSW_32_64: + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SubIdx = PPC::sub_32; + return true; + } +} + unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 7d49aa129e36..374213ea435b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -92,6 +92,9 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const; + bool isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/llvm/test/CodeGen/PowerPC/coalesce-ext.ll b/llvm/test/CodeGen/PowerPC/coalesce-ext.ll new file mode 100644 index 000000000000..08a2cc773fd5 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/coalesce-ext.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=ppc64 < %s | FileCheck %s +; Check that the peephole optimizer knows about sext and zext instructions. +; CHECK: test1sext +define i32 @test1sext(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind { + %C = add i64 %A, %B + ; CHECK: add [[SUM:r[0-9]+]], r3, r4 + %D = trunc i64 %C to i32 + %E = shl i64 %C, 32 + %F = ashr i64 %E, 32 + ; CHECK: extsw [[EXT:r[0-9]+]], [[SUM]] + store volatile i64 %F, i64 *%P2 + ; CHECK: std [[EXT]] + store volatile i32 %D, i32* %P + ; Reuse low bits of extended register, don't extend live range of SUM. + ; CHECK: stw [[EXT]] + ret i32 %D +}