[Thumb-1] Select post-increment load and store where possible
Thumb-1 doesn't have post-inc or pre-inc load or store instructions. However the LDM/STM instructions with writeback can function as post-inc load/store: ldm r0!, {r1} @ load from r0 into r1 and increment r0 by 4 Obviously, this only works if the post increment is 4. llvm-svn: 275540
This commit is contained in:
parent
26b0bc0bd6
commit
b3326df56a
|
@ -195,6 +195,7 @@ public:
|
||||||
private:
|
private:
|
||||||
/// Indexed (pre/post inc/dec) load matching code for ARM.
|
/// Indexed (pre/post inc/dec) load matching code for ARM.
|
||||||
bool tryARMIndexedLoad(SDNode *N);
|
bool tryARMIndexedLoad(SDNode *N);
|
||||||
|
bool tryT1IndexedLoad(SDNode *N);
|
||||||
bool tryT2IndexedLoad(SDNode *N);
|
bool tryT2IndexedLoad(SDNode *N);
|
||||||
|
|
||||||
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
|
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
|
||||||
|
@ -1543,6 +1544,31 @@ bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
|
||||||
|
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||||
|
EVT LoadedVT = LD->getMemoryVT();
|
||||||
|
ISD::MemIndexedMode AM = LD->getAddressingMode();
|
||||||
|
if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD ||
|
||||||
|
AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
|
||||||
|
if (!COffs || COffs->getZExtValue() != 4)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
|
||||||
|
// The encoding of LDM is not how the rest of ISel expects a post-inc load to
|
||||||
|
// look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
|
||||||
|
// ISel.
|
||||||
|
SDValue Chain = LD->getChain();
|
||||||
|
SDValue Base = LD->getBasePtr();
|
||||||
|
SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
|
||||||
|
CurDAG->getRegister(0, MVT::i32), Chain };
|
||||||
|
ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32,
|
||||||
|
MVT::Other, Ops));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
|
bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
|
||||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||||
ISD::MemIndexedMode AM = LD->getAddressingMode();
|
ISD::MemIndexedMode AM = LD->getAddressingMode();
|
||||||
|
@ -3015,6 +3041,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
||||||
if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
|
if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
|
||||||
if (tryT2IndexedLoad(N))
|
if (tryT2IndexedLoad(N))
|
||||||
return;
|
return;
|
||||||
|
} else if (Subtarget->isThumb()) {
|
||||||
|
if (tryT1IndexedLoad(N))
|
||||||
|
return;
|
||||||
} else if (tryARMIndexedLoad(N))
|
} else if (tryARMIndexedLoad(N))
|
||||||
return;
|
return;
|
||||||
// Other cases are autogenerated.
|
// Other cases are autogenerated.
|
||||||
|
|
|
@ -715,6 +715,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||||
setIndexedStoreAction(im, MVT::i16, Legal);
|
setIndexedStoreAction(im, MVT::i16, Legal);
|
||||||
setIndexedStoreAction(im, MVT::i32, Legal);
|
setIndexedStoreAction(im, MVT::i32, Legal);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
|
||||||
|
setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
|
||||||
|
setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
|
||||||
}
|
}
|
||||||
|
|
||||||
setOperationAction(ISD::SADDO, MVT::i32, Custom);
|
setOperationAction(ISD::SADDO, MVT::i32, Custom);
|
||||||
|
@ -8247,6 +8251,19 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||||
MI.dump();
|
MI.dump();
|
||||||
llvm_unreachable("Unexpected instr type to insert");
|
llvm_unreachable("Unexpected instr type to insert");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Thumb1 post-indexed loads are really just single-register LDMs.
|
||||||
|
case ARM::tLDR_postidx: {
|
||||||
|
BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
|
||||||
|
.addOperand(MI->getOperand(1)) // Rn_wb
|
||||||
|
.addOperand(MI->getOperand(2)) // Rn
|
||||||
|
.addOperand(MI->getOperand(3)) // PredImm
|
||||||
|
.addOperand(MI->getOperand(4)) // PredReg
|
||||||
|
.addOperand(MI->getOperand(0)); // Rt
|
||||||
|
MI->eraseFromParent();
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
// The Thumb2 pre-indexed stores have the same MI operands, they just
|
// The Thumb2 pre-indexed stores have the same MI operands, they just
|
||||||
// define them differently in the .td files from the isel patterns, so
|
// define them differently in the .td files from the isel patterns, so
|
||||||
// they need pseudos.
|
// they need pseudos.
|
||||||
|
@ -11596,22 +11613,37 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
|
||||||
SDValue &Offset,
|
SDValue &Offset,
|
||||||
ISD::MemIndexedMode &AM,
|
ISD::MemIndexedMode &AM,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
if (Subtarget->isThumb1Only())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
EVT VT;
|
EVT VT;
|
||||||
SDValue Ptr;
|
SDValue Ptr;
|
||||||
bool isSEXTLoad = false;
|
bool isSEXTLoad = false, isNonExt;
|
||||||
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
|
||||||
VT = LD->getMemoryVT();
|
VT = LD->getMemoryVT();
|
||||||
Ptr = LD->getBasePtr();
|
Ptr = LD->getBasePtr();
|
||||||
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
|
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
|
||||||
|
isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
|
||||||
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
|
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
|
||||||
VT = ST->getMemoryVT();
|
VT = ST->getMemoryVT();
|
||||||
Ptr = ST->getBasePtr();
|
Ptr = ST->getBasePtr();
|
||||||
|
isNonExt = !ST->isTruncatingStore();
|
||||||
} else
|
} else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (Subtarget->isThumb1Only()) {
|
||||||
|
// Thumb-1 can do a limited post-inc load or store as an updating LDM. It
|
||||||
|
// must be non-extending/truncating, i32, with an offset of 4.
|
||||||
|
assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
|
||||||
|
if (Op->getOpcode() != ISD::ADD || !isNonExt)
|
||||||
|
return false;
|
||||||
|
auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
|
||||||
|
if (!RHS || RHS->getZExtValue() != 4)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Offset = Op->getOperand(1);
|
||||||
|
Base = Op->getOperand(0);
|
||||||
|
AM = ISD::POST_INC;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool isInc;
|
bool isInc;
|
||||||
bool isLegal = false;
|
bool isLegal = false;
|
||||||
if (Subtarget->isThumb2())
|
if (Subtarget->isThumb2())
|
||||||
|
|
|
@ -1451,6 +1451,24 @@ def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
|
||||||
def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
|
def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
|
||||||
def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
|
def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
|
||||||
|
|
||||||
|
// post-inc loads and stores
|
||||||
|
|
||||||
|
// post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is
|
||||||
|
// different to how ISel expects them for a post-inc load, so use a pseudo
|
||||||
|
// and expand it just after ISel.
|
||||||
|
let usesCustomInserter = 1,
|
||||||
|
Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
|
||||||
|
def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
|
||||||
|
(ins rGPR:$Rn, pred:$p),
|
||||||
|
4, IIC_iStore_ru,
|
||||||
|
[]>;
|
||||||
|
|
||||||
|
// post-inc STR -> STM r0!, {r1}. The layout of this (because it doesn't def
|
||||||
|
// multiple registers) is the same in ISel as MachineInstr, so there's no need
|
||||||
|
// for a pseudo.
|
||||||
|
def : T1Pat<(post_store rGPR:$Rt, rGPR:$Rn, 4),
|
||||||
|
(tSTMIA_UPD rGPR:$Rn, rGPR:$Rt)>;
|
||||||
|
|
||||||
// If it's impossible to use [r,r] address mode for sextload, select to
|
// If it's impossible to use [r,r] address mode for sextload, select to
|
||||||
// ldr{b|h} + sxt{b|h} instead.
|
// ldr{b|h} + sxt{b|h} instead.
|
||||||
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
|
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
; RUN: llc -mtriple=thumbv7 -mcpu=cortex-m0 < %s -disable-lsr | FileCheck %s
|
||||||
|
; FIXME: LSR mangles the last two testcases pretty badly. When this is fixed, remove
|
||||||
|
; the -disable-lsr above.
|
||||||
|
|
||||||
|
; CHECK-LABEL: @f
|
||||||
|
; CHECK: ldm {{r[0-9]}}!, {r{{[0-9]}}}
|
||||||
|
define i32 @f(i32* readonly %a, i32* readnone %b) {
|
||||||
|
%1 = icmp eq i32* %a, %b
|
||||||
|
br i1 %1, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
.lr.ph: ; preds = %.lr.ph, %0
|
||||||
|
%i.02 = phi i32 [ %3, %.lr.ph ], [ 0, %0 ]
|
||||||
|
%.01 = phi i32* [ %4, %.lr.ph ], [ %a, %0 ]
|
||||||
|
%2 = load i32, i32* %.01, align 4
|
||||||
|
%3 = add nsw i32 %2, %i.02
|
||||||
|
%4 = getelementptr inbounds i32, i32* %.01, i32 1
|
||||||
|
%5 = icmp eq i32* %4, %b
|
||||||
|
br i1 %5, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
._crit_edge: ; preds = %.lr.ph, %0
|
||||||
|
%i.0.lcssa = phi i32 [ 0, %0 ], [ %3, %.lr.ph ]
|
||||||
|
ret i32 %i.0.lcssa
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @g
|
||||||
|
; CHECK-NOT: ldm
|
||||||
|
define i32 @g(i32* readonly %a, i32* readnone %b) {
|
||||||
|
%1 = icmp eq i32* %a, %b
|
||||||
|
br i1 %1, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
.lr.ph: ; preds = %.lr.ph, %0
|
||||||
|
%i.02 = phi i32 [ %3, %.lr.ph ], [ 0, %0 ]
|
||||||
|
%.01 = phi i32* [ %4, %.lr.ph ], [ %a, %0 ]
|
||||||
|
%2 = load i32, i32* %.01, align 4
|
||||||
|
%3 = add nsw i32 %2, %i.02
|
||||||
|
%4 = getelementptr inbounds i32, i32* %.01, i32 2
|
||||||
|
%5 = icmp eq i32* %4, %b
|
||||||
|
br i1 %5, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
._crit_edge: ; preds = %.lr.ph, %0
|
||||||
|
%i.0.lcssa = phi i32 [ 0, %0 ], [ %3, %.lr.ph ]
|
||||||
|
ret i32 %i.0.lcssa
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @h
|
||||||
|
; CHECK: stm {{r[0-9]}}!, {r{{[0-9]}}}
|
||||||
|
define void @h(i32* %a, i32* readnone %b) {
|
||||||
|
%1 = icmp eq i32* %a, %b
|
||||||
|
br i1 %1, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
.lr.ph: ; preds = %.lr.ph, %0
|
||||||
|
%i.02 = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]
|
||||||
|
%.01 = phi i32* [ %3, %.lr.ph ], [ %a, %0 ]
|
||||||
|
%2 = add nsw i32 %i.02, 1
|
||||||
|
store i32 %i.02, i32* %.01, align 4
|
||||||
|
%3 = getelementptr inbounds i32, i32* %.01, i32 1
|
||||||
|
%4 = icmp eq i32* %3, %b
|
||||||
|
br i1 %4, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
._crit_edge: ; preds = %.lr.ph, %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: @j
|
||||||
|
; CHECK-NOT: stm
|
||||||
|
define void @j(i32* %a, i32* readnone %b) {
|
||||||
|
%1 = icmp eq i32* %a, %b
|
||||||
|
br i1 %1, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
.lr.ph: ; preds = %.lr.ph, %0
|
||||||
|
%i.02 = phi i32 [ %2, %.lr.ph ], [ 0, %0 ]
|
||||||
|
%.01 = phi i32* [ %3, %.lr.ph ], [ %a, %0 ]
|
||||||
|
%2 = add nsw i32 %i.02, 1
|
||||||
|
store i32 %i.02, i32* %.01, align 4
|
||||||
|
%3 = getelementptr inbounds i32, i32* %.01, i32 2
|
||||||
|
%4 = icmp eq i32* %3, %b
|
||||||
|
br i1 %4, label %._crit_edge, label %.lr.ph
|
||||||
|
|
||||||
|
._crit_edge: ; preds = %.lr.ph, %0
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue