[AVR] Optimize int16 airthmetic right shift for shift amount 7/14/15

Reviewed By: aykevl

Differential Revision: https://reviews.llvm.org/D115618
This commit is contained in:
Ben Shi 2022-03-26 03:24:18 +00:00
parent 88436afe30
commit bce2e208e0
6 changed files with 251 additions and 6 deletions

View File

@ -84,18 +84,23 @@ private:
bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI); bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);
/// Specific shift implementation. /// Specific shift implementation for int8.
bool expandLSLB7Rd(Block &MBB, BlockIt MBBI); bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
bool expandLSRB7Rd(Block &MBB, BlockIt MBBI); bool expandLSRB7Rd(Block &MBB, BlockIt MBBI);
bool expandASRB6Rd(Block &MBB, BlockIt MBBI); bool expandASRB6Rd(Block &MBB, BlockIt MBBI);
bool expandASRB7Rd(Block &MBB, BlockIt MBBI); bool expandASRB7Rd(Block &MBB, BlockIt MBBI);
/// Specific shift implementation for int16.
bool expandLSLW4Rd(Block &MBB, BlockIt MBBI); bool expandLSLW4Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW4Rd(Block &MBB, BlockIt MBBI); bool expandLSRW4Rd(Block &MBB, BlockIt MBBI);
bool expandASRW7Rd(Block &MBB, BlockIt MBBI);
bool expandLSLW8Rd(Block &MBB, BlockIt MBBI); bool expandLSLW8Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW8Rd(Block &MBB, BlockIt MBBI); bool expandLSRW8Rd(Block &MBB, BlockIt MBBI);
bool expandASRW8Rd(Block &MBB, BlockIt MBBI); bool expandASRW8Rd(Block &MBB, BlockIt MBBI);
bool expandLSLW12Rd(Block &MBB, BlockIt MBBI); bool expandLSLW12Rd(Block &MBB, BlockIt MBBI);
bool expandLSRW12Rd(Block &MBB, BlockIt MBBI); bool expandLSRW12Rd(Block &MBB, BlockIt MBBI);
bool expandASRW14Rd(Block &MBB, BlockIt MBBI);
bool expandASRW15Rd(Block &MBB, BlockIt MBBI);
// Common implementation of LPMWRdZ and ELPMWRdZ. // Common implementation of LPMWRdZ and ELPMWRdZ.
bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt); bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
@ -1401,7 +1406,7 @@ bool AVRExpandPseudo::expand<AVR::LSLWHiRd>(Block &MBB, BlockIt MBBI) {
// add hireg, hireg <==> lsl hireg // add hireg, hireg <==> lsl hireg
auto MILSL = auto MILSL =
buildMI(MBB, MBBI, AVR::ADDRdRr) buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define, getDeadRegState(DstIsDead)) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill)) .addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill)); .addReg(DstHiReg, getKillRegState(DstIsKill));
@ -1820,6 +1825,53 @@ bool AVRExpandPseudo::expand<AVR::ASRWLoRd>(Block &MBB, BlockIt MBBI) {
return true; return true;
} }
bool AVRExpandPseudo::expandASRW7Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// lsl r24
// mov r24,r25
// rol r24
// sbc r25,r25
// lsl r24 <=> add r24, r24
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, RegState::Kill)
.addReg(DstLoReg, RegState::Kill);
// mov r24, r25
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg);
// rol r24 <=> adc r24, r24
buildMI(MBB, MBBI, AVR::ADCRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));
// sbc r25, r25
auto MISBC =
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MISBC->getOperand(3).setIsDead();
// SREG is always implicitly killed
MISBC->getOperand(4).setIsKill();
MI.eraseFromParent();
return true;
}
bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) { bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI; MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg; Register DstLoReg, DstHiReg;
@ -1846,8 +1898,102 @@ bool AVRExpandPseudo::expandASRW8Rd(Block &MBB, BlockIt MBBI) {
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, getKillRegState(DstIsKill)) .addReg(DstHiReg, getKillRegState(DstIsKill))
.addReg(DstHiReg, getKillRegState(DstIsKill)); .addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead) if (ImpIsDead)
MIBHI->getOperand(3).setIsDead(); MIBHI->getOperand(3).setIsDead();
// SREG is always implicitly killed
MIBHI->getOperand(4).setIsKill();
MI.eraseFromParent();
return true;
}
bool AVRExpandPseudo::expandASRW14Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// lsl r25
// sbc r24, r24
// lsl r25
// mov r25, r24
// rol r24
// lsl r25 <=> add r25, r25
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);
// sbc r24, r24
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, RegState::Kill)
.addReg(DstLoReg, RegState::Kill);
// lsl r25 <=> add r25, r25
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);
// mov r25, r24
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg);
// rol r24 <=> adc r24, r24
auto MIROL =
buildMI(MBB, MBBI, AVR::ADCRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstLoReg, getKillRegState(DstIsKill))
.addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIROL->getOperand(3).setIsDead();
// SREG is always implicitly killed
MIROL->getOperand(4).setIsKill();
MI.eraseFromParent();
return false;
}
bool AVRExpandPseudo::expandASRW15Rd(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
// lsl r25
// sbc r25, r25
// mov r24, r25
// lsl r25 <=> add r25, r25
buildMI(MBB, MBBI, AVR::ADDRdRr)
.addReg(DstHiReg, RegState::Define)
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);
// sbc r25, r25
auto MISBC =
buildMI(MBB, MBBI, AVR::SBCRdRr)
.addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg, RegState::Kill)
.addReg(DstHiReg, RegState::Kill);
if (ImpIsDead)
MISBC->getOperand(3).setIsDead();
// SREG is always implicitly killed
MISBC->getOperand(4).setIsKill();
// mov r24, r25
buildMI(MBB, MBBI, AVR::MOVRdRr)
.addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstHiReg);
MI.eraseFromParent(); MI.eraseFromParent();
return true; return true;
@ -1858,8 +2004,14 @@ bool AVRExpandPseudo::expand<AVR::ASRWNRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI; MachineInstr &MI = *MBBI;
unsigned Imm = MI.getOperand(2).getImm(); unsigned Imm = MI.getOperand(2).getImm();
switch (Imm) { switch (Imm) {
case 7:
return expandASRW7Rd(MBB, MBBI);
case 8: case 8:
return expandASRW8Rd(MBB, MBBI); return expandASRW8Rd(MBB, MBBI);
case 14:
return expandASRW14Rd(MBB, MBBI);
case 15:
return expandASRW15Rd(MBB, MBBI);
default: default:
llvm_unreachable("unimplemented asrwn"); llvm_unreachable("unimplemented asrwn");
return false; return false;

View File

@ -270,8 +270,6 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
} }
SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
//: TODO: this function has to be completely rewritten to produce optimal
// code, for now it's producing very long but correct code.
unsigned Opc8; unsigned Opc8;
const SDNode *N = Op.getNode(); const SDNode *N = Op.getNode();
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
@ -372,6 +370,27 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
ShiftAmount = 0; ShiftAmount = 0;
} }
} else if (VT.getSizeInBits() == 16) { } else if (VT.getSizeInBits() == 16) {
if (Op.getOpcode() == ISD::SRA)
// Special optimization for int16 arithmetic right shift.
switch (ShiftAmount) {
case 15:
Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
DAG.getConstant(15, dl, VT));
ShiftAmount = 0;
break;
case 14:
Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
DAG.getConstant(14, dl, VT));
ShiftAmount = 0;
break;
case 7:
Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim,
DAG.getConstant(7, dl, VT));
ShiftAmount = 0;
break;
default:
break;
}
if (4 <= ShiftAmount && ShiftAmount < 8) if (4 <= ShiftAmount && ShiftAmount < 8)
switch (Op.getOpcode()) { switch (Op.getOpcode()) {
case ISD::SHL: case ISD::SHL:

View File

@ -1943,7 +1943,7 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
: $src)), : $src)),
(implicit SREG)]>; (implicit SREG)]>;
def ASRWNRd : Pseudo<(outs DLDREGS def ASRWNRd : Pseudo<(outs DREGS
: $rd), : $rd),
(ins DREGS (ins DREGS
: $src, imm16 : $src, imm16

View File

@ -0,0 +1,41 @@
# RUN: llc -O0 -run-pass=avr-expand-pseudo %s -o - | FileCheck %s
--- |
target triple = "avr--"
define void @test() {
entry:
ret void
}
...
---
name: test
body: |
bb.0.entry:
liveins: $r15r14, $r13r12, $r11r10, $r17r16
; CHECK-LABEL: test
; CHECK: $r14 = ADDRdRr killed $r14, killed $r14, implicit-def $sreg
; CHECK-NEXT: $r14 = MOVRdRr $r15
; CHECK-NEXT: $r14 = ADCRdRr $r14, $r14, implicit-def $sreg, implicit $sreg
; CHECK-NEXT: $r15 = SBCRdRr $r15, $r15, implicit-def $sreg, implicit killed $sreg
$r15r14 = ASRWNRd $r15r14, 7, implicit-def $sreg
; CHECK-NEXT: $r12 = MOVRdRr $r13
; CHECK-NEXT: $r13 = ADDRdRr killed $r13, killed $r13, implicit-def $sreg
; CHECK-NEXT: $r13 = SBCRdRr $r13, $r13, implicit-def $sreg, implicit killed $sreg
$r13r12 = ASRWNRd $r13r12, 8, implicit-def $sreg
; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg
; CHECK-NEXT: $r10 = SBCRdRr killed $r10, killed $r10, implicit-def $sreg, implicit $sreg
; CHECK-NEXT: $r11 = ADDRdRr killed $r11, killed $r11, implicit-def $sreg
; CHECK-NEXT: $r11 = MOVRdRr $r10
; CHECK-NEXT: $r10 = ADCRdRr $r10, $r10, implicit-def $sreg, implicit killed $sreg
$r11r10 = ASRWNRd $r11r10, 14, implicit-def $sreg
; CHECK-NEXT: $r17 = ADDRdRr killed $r17, killed $r17, implicit-def $sreg
; CHECK-NEXT: $r17 = SBCRdRr killed $r17, killed $r17, implicit-def $sreg, implicit killed $sreg
; CHECK-NEXT: $r16 = MOVRdRr $r17
$r17r16 = ASRWNRd $r17r16, 15, implicit-def $sreg
...

View File

@ -301,6 +301,17 @@ define i16 @lsr_i16_13(i16 %a) {
ret i16 %result ret i16 %result
} }
define i16 @asr_i16_7(i16 %a) {
; CHECK-LABEL: asr_i16_7
; CHECK: lsl r24
; CHECK-NEXT: mov r24, r25
; CHECK-NEXT: rol r24
; CHECK-NEXT: sbc r25, r25
; CHECK-NEXT: ret
%result = ashr i16 %a, 7
ret i16 %result
}
define i16 @asr_i16_9(i16 %a) { define i16 @asr_i16_9(i16 %a) {
; CHECK-LABEL: asr_i16_9 ; CHECK-LABEL: asr_i16_9
; CHECK: mov r24, r25 ; CHECK: mov r24, r25
@ -325,3 +336,25 @@ define i16 @asr_i16_12(i16 %a) {
%result = ashr i16 %a, 12 %result = ashr i16 %a, 12
ret i16 %result ret i16 %result
} }
define i16 @asr_i16_14(i16 %a) {
; CHECK-LABEL: asr_i16_14
; CHECK: lsl r25
; CHECK-NEXT: sbc r24, r24
; CHECK-NEXT: lsl r25
; CHECK-NEXT: mov r25, r24
; CHECK-NEXT: rol r24
; CHECK-NEXT: ret
%result = ashr i16 %a, 14
ret i16 %result
}
define i16 @asr_i16_15(i16 %a) {
; CHECK-LABEL: asr_i16_15
; CHECK: lsl r25
; CHECK-NEXT: sbc r25, r25
; CHECK-NEXT: mov r24, r25
; CHECK-NEXT: ret
%result = ashr i16 %a, 15
ret i16 %result
}

View File

@ -1,4 +1,4 @@
; RUN: llc -march=avr < %s | FileCheck %s ; RUN: llc -march=avr -verify-machineinstrs < %s | FileCheck %s
define i8 @sign_extended_1_to_8(i1) { define i8 @sign_extended_1_to_8(i1) {
; CHECK-LABEL: sign_extended_1_to_8 ; CHECK-LABEL: sign_extended_1_to_8