forked from OSchip/llvm-project
[Peephole] rewrite INSERT_SUBREG to SUBREG_TO_REG if upper bits zero
Restrict the 32-bit form of an instruction of integer as too many test cases will be clobber as the register number updated. From %reg = INSERT_SUBREG %reg, %subreg, subidx To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx Try to prefix the redundant mov instruction at D132325 as the SUBREG_TO_REG should not generate code. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D132939
This commit is contained in:
parent
180bf5f940
commit
b6655333c2
|
@ -32,6 +32,9 @@
|
|||
// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
|
||||
// operand are set to zero.
|
||||
//
|
||||
// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
|
||||
// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AArch64ExpandImm.h"
|
||||
|
@ -97,6 +100,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
|
|||
template <typename T>
|
||||
bool visitAND(unsigned Opc, MachineInstr &MI);
|
||||
bool visitORR(MachineInstr &MI);
|
||||
bool visitINSERT(MachineInstr &MI);
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
StringRef getPassName() const override {
|
||||
|
@ -250,6 +254,50 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
|
||||
// Check this INSERT_SUBREG comes from below zero-extend pattern.
|
||||
//
|
||||
// From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
|
||||
// To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
|
||||
//
|
||||
// We're assuming the first operand to INSERT_SUBREG is irrelevant because a
|
||||
// COPY would destroy the upper part of the register anyway
|
||||
if (!MI.isRegTiedToDefOperand(1))
|
||||
return false;
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
|
||||
MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
|
||||
if (!SrcMI)
|
||||
return false;
|
||||
|
||||
// From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
|
||||
//
|
||||
// When you use the 32-bit form of an instruction, the upper 32 bits of the
|
||||
// source registers are ignored and the upper 32 bits of the destination
|
||||
// register are set to zero.
|
||||
//
|
||||
// If AArch64's 32-bit form of instruction defines the source operand of
|
||||
// zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
|
||||
// real AArch64 instruction and if it is not, do not process the opcode
|
||||
// conservatively.
|
||||
if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
|
||||
!AArch64::GPR64allRegClass.hasSubClassEq(RC))
|
||||
return false;
|
||||
|
||||
// Build a SUBREG_TO_REG instruction
|
||||
MachineInstr *SubregMI =
|
||||
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
|
||||
TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
|
||||
.addImm(0)
|
||||
.add(MI.getOperand(2))
|
||||
.add(MI.getOperand(3));
|
||||
LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
|
||||
MI.eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
|
||||
// The immediate must be in the form of ((imm0 << 12) + imm1), in which both
|
||||
|
@ -493,6 +541,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
|
|||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
case AArch64::INSERT_SUBREG:
|
||||
Changed = visitINSERT(MI);
|
||||
break;
|
||||
case AArch64::ANDWrr:
|
||||
Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
|
||||
|
||||
--- |
|
||||
define i64 @loop2(i32 noundef %width) {
|
||||
entry:
|
||||
%add = add i32 %width, -1
|
||||
%zext = zext i32 %add to i64
|
||||
%shl = shl nuw nsw i64 %zext, 1
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
---
|
||||
name: loop2
|
||||
alignment: 4
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: gpr32common, preferred-register: '' }
|
||||
- { id: 1, class: gpr32common, preferred-register: '' }
|
||||
- { id: 2, class: gpr64, preferred-register: '' }
|
||||
- { id: 3, class: gpr64all, preferred-register: '' }
|
||||
- { id: 4, class: gpr64, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$w0', virtual-reg: '%0' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $w0
|
||||
|
||||
; CHECK-LABEL: name: loop2
|
||||
; CHECK: liveins: $w0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
|
||||
; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY]], 1, 0
|
||||
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
|
||||
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[SUBWri]], %subreg.sub_32
|
||||
; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = nuw nsw UBFMXri killed [[SUBREG_TO_REG]], 63, 31
|
||||
; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $x0
|
||||
%0:gpr32common = COPY $w0
|
||||
%1:gpr32common = SUBWri %0, 1, 0
|
||||
%3:gpr64all = IMPLICIT_DEF
|
||||
%2:gpr64 = INSERT_SUBREG %3, killed %1, %subreg.sub_32
|
||||
%4:gpr64 = nuw nsw UBFMXri killed %2, 63, 31
|
||||
$x0 = COPY %4
|
||||
RET_ReallyLR implicit $x0
|
Loading…
Reference in New Issue