[LoongArch] Add emergency spill slot for GPR for large frames
An emergency spill slot is created when the stack size cannot be represented by an 11-bit signed number. This patch also modifies how the `sp` is adjusted in the prologue. `RegScavenger` will place the spill instruction before the prologue if a VReg is created in the prologue. This will pollute the caller's stack data. Therefore, until there is better way, we just use the `addi.w/d` instruction for stack adjustment to ensure that VReg will not be created. (RISCV has the same issue #58286) Due to the addition of emergency spill slot, some test cases that use exact stacksize need to be updated. Differential Revision: https://reviews.llvm.org/D135757
This commit is contained in:
parent
9bb1e21f07
commit
4e2364a285
|
@ -118,6 +118,26 @@ void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const {
|
|||
MFI.setStackSize(FrameSize);
|
||||
}
|
||||
|
||||
void LoongArchFrameLowering::processFunctionBeforeFrameFinalized(
|
||||
MachineFunction &MF, RegScavenger *RS) const {
|
||||
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
|
||||
const TargetRegisterClass &RC = LoongArch::GPRRegClass;
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
||||
// estimateStackSize has been observed to under-estimate the final stack
|
||||
// size, so give ourselves wiggle-room by checking for stack size
|
||||
// representable an 11-bit signed field rather than 12-bits.
|
||||
if (isInt<11>(MFI.estimateStackSize(MF)))
|
||||
return;
|
||||
|
||||
// Create an emergency spill slot.
|
||||
int FI =
|
||||
MFI.CreateStackObject(RI->getSpillSize(RC), RI->getSpillAlign(RC), false);
|
||||
RS->addScavengingFrameIndex(FI);
|
||||
LLVM_DEBUG(dbgs() << "Allocated FI(" << FI
|
||||
<< ") as the emergency spill slot.\n");
|
||||
}
|
||||
|
||||
void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
|
@ -125,6 +145,7 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
const LoongArchRegisterInfo *RI = STI.getRegisterInfo();
|
||||
const LoongArchInstrInfo *TII = STI.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
bool IsLA64 = STI.is64Bit();
|
||||
|
||||
Register SPReg = LoongArch::R3;
|
||||
Register FPReg = LoongArch::R22;
|
||||
|
@ -144,19 +165,22 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
if (StackSize == 0 && !MFI.adjustsStack())
|
||||
return;
|
||||
|
||||
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
|
||||
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF, true);
|
||||
uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount;
|
||||
// Split the SP adjustment to reduce the offsets of callee saved spill.
|
||||
if (FirstSPAdjustAmount)
|
||||
StackSize = FirstSPAdjustAmount;
|
||||
|
||||
// Adjust stack.
|
||||
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
|
||||
// Emit ".cfi_def_cfa_offset StackSize".
|
||||
unsigned CFIIndex =
|
||||
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
if (FirstSPAdjustAmount != 2048 || SecondSPAdjustAmount == 0) {
|
||||
// Emit ".cfi_def_cfa_offset StackSize".
|
||||
unsigned CFIIndex =
|
||||
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
const auto &CSI = MFI.getCalleeSavedInfo();
|
||||
|
||||
|
@ -193,14 +217,25 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
}
|
||||
|
||||
// Emit the second SP adjustment after saving callee saved registers.
|
||||
if (FirstSPAdjustAmount) {
|
||||
uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount;
|
||||
assert(SecondSPAdjustAmount > 0 &&
|
||||
"SecondSPAdjustAmount should be greater than zero");
|
||||
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
|
||||
MachineInstr::FrameSetup);
|
||||
if (FirstSPAdjustAmount && SecondSPAdjustAmount) {
|
||||
if (hasFP(MF)) {
|
||||
assert(SecondSPAdjustAmount > 0 &&
|
||||
"SecondSPAdjustAmount should be greater than zero");
|
||||
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
|
||||
MachineInstr::FrameSetup);
|
||||
} else {
|
||||
// FIXME: RegScavenger will place the spill instruction before the
|
||||
// prologue if a VReg is created in the prologue. This will pollute the
|
||||
// caller's stack data. Therefore, until there is better way, we just use
|
||||
// the `addi.w/d` instruction for stack adjustment to ensure that VReg
|
||||
// will not be created.
|
||||
for (int Val = SecondSPAdjustAmount; Val > 0; Val -= 2048)
|
||||
BuildMI(MBB, MBBI, DL,
|
||||
TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Val < 2048 ? -Val : -2048)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
||||
if (!hasFP(MF)) {
|
||||
// If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
|
||||
// don't emit an sp-based .cfi_def_cfa_offset
|
||||
// Emit ".cfi_def_cfa_offset RealStackSize"
|
||||
|
@ -219,14 +254,12 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
Register VR =
|
||||
MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
|
||||
BuildMI(MBB, MBBI, DL,
|
||||
TII->get(STI.is64Bit() ? LoongArch::SRLI_D : LoongArch::SRLI_W),
|
||||
VR)
|
||||
TII->get(IsLA64 ? LoongArch::SRLI_D : LoongArch::SRLI_W), VR)
|
||||
.addReg(SPReg)
|
||||
.addImm(ShiftAmount)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
BuildMI(MBB, MBBI, DL,
|
||||
TII->get(STI.is64Bit() ? LoongArch::SLLI_D : LoongArch::SLLI_W),
|
||||
SPReg)
|
||||
TII->get(IsLA64 ? LoongArch::SLLI_D : LoongArch::SLLI_W), SPReg)
|
||||
.addReg(VR)
|
||||
.addImm(ShiftAmount)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
|
@ -295,20 +328,27 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
// st.d $ra, $sp, 2024
|
||||
// st.d $fp, $sp, 2016
|
||||
// addi.d $sp, $sp, -16
|
||||
uint64_t LoongArchFrameLowering::getFirstSPAdjustAmount(
|
||||
const MachineFunction &MF) const {
|
||||
uint64_t
|
||||
LoongArchFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF,
|
||||
bool IsPrologue) const {
|
||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
|
||||
|
||||
// Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
|
||||
// 12-bit and there exists a callee-saved register needing to be pushed.
|
||||
if (!isInt<12>(MFI.getStackSize()) && (CSI.size() > 0)) {
|
||||
if (!isInt<12>(MFI.getStackSize())) {
|
||||
// FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will
|
||||
// cause sp = sp + 2048 in the epilogue to be split into multiple
|
||||
// instructions. Offsets smaller than 2048 can fit in a single load/store
|
||||
// instruction, and we have to stick with the stack alignment.
|
||||
// So (2048 - StackAlign) will satisfy the stack alignment.
|
||||
return 2048 - getStackAlign().value();
|
||||
//
|
||||
// FIXME: This place may seem odd. When using multiple ADDI instructions to
|
||||
// adjust the stack in Prologue, and there are no callee-saved registers, we
|
||||
// can take advantage of the logic of split sp ajustment to reduce code
|
||||
// changes.
|
||||
return CSI.size() > 0 ? 2048 - getStackAlign().value()
|
||||
: (IsPrologue ? 2048 : 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -34,6 +34,9 @@ public:
|
|||
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
|
||||
RegScavenger *RS) const override;
|
||||
|
||||
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
|
||||
RegScavenger *RS) const override;
|
||||
|
||||
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
||||
MachineBasicBlock::iterator
|
||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
|
@ -45,7 +48,8 @@ public:
|
|||
bool hasFP(const MachineFunction &MF) const override;
|
||||
bool hasBP(const MachineFunction &MF) const;
|
||||
|
||||
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
|
||||
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF,
|
||||
bool IsPrologue = false) const;
|
||||
|
||||
private:
|
||||
void determineFrameLayout(MachineFunction &MF) const;
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc --mtriple=loongarch64 -O0 < %s | FileCheck %s
|
||||
|
||||
@var = external global i32
|
||||
|
||||
define void @func() {
|
||||
; CHECK-LABEL: func:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -2048
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -2048
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -16
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 4112
|
||||
; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(var)
|
||||
; CHECK-NEXT: ld.d $a1, $a0, %got_pc_lo12(var)
|
||||
; CHECK-NEXT: ld.w $t8, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t7, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t6, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t5, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t4, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t3, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t2, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t1, $a1, 0
|
||||
; CHECK-NEXT: ld.w $t0, $a1, 0
|
||||
; CHECK-NEXT: ld.w $a7, $a1, 0
|
||||
; CHECK-NEXT: ld.w $a6, $a1, 0
|
||||
; CHECK-NEXT: ld.w $a5, $a1, 0
|
||||
; CHECK-NEXT: ld.w $a4, $a1, 0
|
||||
; CHECK-NEXT: ld.w $a3, $a1, 0
|
||||
; CHECK-NEXT: ld.w $a2, $a1, 0
|
||||
; CHECK-NEXT: ld.w $a0, $a1, 0
|
||||
; CHECK-NEXT: st.d $fp, $sp, 0
|
||||
; CHECK-NEXT: lu12i.w $fp, 1
|
||||
; CHECK-NEXT: ori $fp, $fp, 12
|
||||
; CHECK-NEXT: add.d $fp, $sp, $fp
|
||||
; CHECK-NEXT: st.w $t8, $fp, 0
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 0
|
||||
; CHECK-NEXT: st.w $t8, $a1, 0
|
||||
; CHECK-NEXT: st.w $t7, $a1, 0
|
||||
; CHECK-NEXT: st.w $t6, $a1, 0
|
||||
; CHECK-NEXT: st.w $t5, $a1, 0
|
||||
; CHECK-NEXT: st.w $t4, $a1, 0
|
||||
; CHECK-NEXT: st.w $t3, $a1, 0
|
||||
; CHECK-NEXT: st.w $t2, $a1, 0
|
||||
; CHECK-NEXT: st.w $t1, $a1, 0
|
||||
; CHECK-NEXT: st.w $t0, $a1, 0
|
||||
; CHECK-NEXT: st.w $a7, $a1, 0
|
||||
; CHECK-NEXT: st.w $a6, $a1, 0
|
||||
; CHECK-NEXT: st.w $a5, $a1, 0
|
||||
; CHECK-NEXT: st.w $a4, $a1, 0
|
||||
; CHECK-NEXT: st.w $a3, $a1, 0
|
||||
; CHECK-NEXT: st.w $a2, $a1, 0
|
||||
; CHECK-NEXT: st.w $a0, $a1, 0
|
||||
; CHECK-NEXT: lu12i.w $a0, 1
|
||||
; CHECK-NEXT: ori $a0, $a0, 16
|
||||
; CHECK-NEXT: add.d $sp, $sp, $a0
|
||||
; CHECK-NEXT: ret
|
||||
%space = alloca i32, align 4
|
||||
%stackspace = alloca[1024 x i32], align 4
|
||||
|
||||
;; Load values to increase register pressure.
|
||||
%v0 = load volatile i32, ptr @var
|
||||
%v1 = load volatile i32, ptr @var
|
||||
%v2 = load volatile i32, ptr @var
|
||||
%v3 = load volatile i32, ptr @var
|
||||
%v4 = load volatile i32, ptr @var
|
||||
%v5 = load volatile i32, ptr @var
|
||||
%v6 = load volatile i32, ptr @var
|
||||
%v7 = load volatile i32, ptr @var
|
||||
%v8 = load volatile i32, ptr @var
|
||||
%v9 = load volatile i32, ptr @var
|
||||
%v10 = load volatile i32, ptr @var
|
||||
%v11 = load volatile i32, ptr @var
|
||||
%v12 = load volatile i32, ptr @var
|
||||
%v13 = load volatile i32, ptr @var
|
||||
%v14 = load volatile i32, ptr @var
|
||||
%v15 = load volatile i32, ptr @var
|
||||
|
||||
;; Computing a stack-relative values needs an additional register.
|
||||
;; We should get an emergency spill/reload for this.
|
||||
store volatile i32 %v0, ptr %space
|
||||
|
||||
;; store values so they are used.
|
||||
store volatile i32 %v0, ptr @var
|
||||
store volatile i32 %v1, ptr @var
|
||||
store volatile i32 %v2, ptr @var
|
||||
store volatile i32 %v3, ptr @var
|
||||
store volatile i32 %v4, ptr @var
|
||||
store volatile i32 %v5, ptr @var
|
||||
store volatile i32 %v6, ptr @var
|
||||
store volatile i32 %v7, ptr @var
|
||||
store volatile i32 %v8, ptr @var
|
||||
store volatile i32 %v9, ptr @var
|
||||
store volatile i32 %v10, ptr @var
|
||||
store volatile i32 %v11, ptr @var
|
||||
store volatile i32 %v12, ptr @var
|
||||
store volatile i32 %v13, ptr @var
|
||||
store volatile i32 %v14, ptr @var
|
||||
store volatile i32 %v15, ptr @var
|
||||
|
||||
ret void
|
||||
}
|
|
@ -27,6 +27,7 @@ define i32 @test() nounwind {
|
|||
ret i32 0
|
||||
}
|
||||
|
||||
;; Note: will create an emergency spill slot, if (!isInt<11>(StackSize)).
|
||||
;; Should involve only one SP-adjusting addi per adjustment.
|
||||
define void @test_large_frame_size_2032() {
|
||||
; CHECK-LABEL: test_large_frame_size_2032:
|
||||
|
@ -35,7 +36,7 @@ define void @test_large_frame_size_2032() {
|
|||
; CHECK-NEXT: .cfi_def_cfa_offset 2032
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 2032
|
||||
; CHECK-NEXT: ret
|
||||
%1 = alloca i8, i32 2032
|
||||
%1 = alloca i8, i32 2016 ; + 16(emergency slot) = 2032
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -49,7 +50,7 @@ define void @test_large_frame_size_2048() {
|
|||
; CHECK-NEXT: addi.d $sp, $sp, 2032
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 16
|
||||
; CHECK-NEXT: ret
|
||||
%1 = alloca i8, i32 2048
|
||||
%1 = alloca i8, i32 2032 ; + 16(emergency slot) = 2048
|
||||
ret void
|
||||
}
|
||||
|
||||
|
@ -63,21 +64,35 @@ define void @test_large_frame_size_2064() {
|
|||
; CHECK-NEXT: addi.d $sp, $sp, 2032
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 32
|
||||
; CHECK-NEXT: ret
|
||||
%1 = alloca i8, i32 2064
|
||||
%1 = alloca i8, i32 2048 ; + 16(emergency slot) = 2064
|
||||
ret void
|
||||
}
|
||||
|
||||
;; NOTE: Due to the problem with the emegency spill slot, the scratch register
|
||||
;; will not be used when the fp is eliminated. To make this test valid, add the
|
||||
;; attribute "frame-pointer=all".
|
||||
|
||||
;; SP should be adjusted with help of a scratch register.
|
||||
define void @test_large_frame_size_1234576() {
|
||||
define void @test_large_frame_size_1234576() "frame-pointer"="all" {
|
||||
; CHECK-LABEL: test_large_frame_size_1234576:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: lu12i.w $a0, 301
|
||||
; CHECK-NEXT: ori $a0, $a0, 1680
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -2032
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 2032
|
||||
; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_offset 1, -8
|
||||
; CHECK-NEXT: .cfi_offset 22, -16
|
||||
; CHECK-NEXT: addi.d $fp, $sp, 2032
|
||||
; CHECK-NEXT: .cfi_def_cfa 22, 0
|
||||
; CHECK-NEXT: lu12i.w $a0, 300
|
||||
; CHECK-NEXT: ori $a0, $a0, 3760
|
||||
; CHECK-NEXT: sub.d $sp, $sp, $a0
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 1234576
|
||||
; CHECK-NEXT: lu12i.w $a0, 301
|
||||
; CHECK-NEXT: ori $a0, $a0, 1680
|
||||
; CHECK-NEXT: lu12i.w $a0, 300
|
||||
; CHECK-NEXT: ori $a0, $a0, 3760
|
||||
; CHECK-NEXT: add.d $sp, $sp, $a0
|
||||
; CHECK-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 2032
|
||||
; CHECK-NEXT: ret
|
||||
%1 = alloca i8, i32 1234567
|
||||
ret void
|
||||
|
|
|
@ -24,19 +24,20 @@ entry:
|
|||
}
|
||||
|
||||
;; The stack size is 2032 and the SP adjustment will not be split.
|
||||
;; 2016 + 8(RA) + 8(emergency spill slot) = 2032
|
||||
define i32 @NoSplitSP() nounwind {
|
||||
; CHECK-LABEL: NoSplitSP:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: addi.d $sp, $sp, -2032
|
||||
; CHECK-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 0
|
||||
; CHECK-NEXT: addi.d $a0, $sp, 8
|
||||
; CHECK-NEXT: bl %plt(foo)
|
||||
; CHECK-NEXT: move $a0, $zero
|
||||
; CHECK-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload
|
||||
; CHECK-NEXT: addi.d $sp, $sp, 2032
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%xx = alloca [2024 x i8], align 1
|
||||
%xx = alloca [2016 x i8], align 1
|
||||
%0 = getelementptr inbounds [2024 x i8], ptr %xx, i32 0, i32 0
|
||||
%call = call i32 @foo(ptr nonnull %0)
|
||||
ret i32 0
|
||||
|
|
|
@ -305,42 +305,42 @@ define void @caller_no_realign256() "no-realign-stack" {
|
|||
define void @caller512() {
|
||||
; LA32-LABEL: caller512:
|
||||
; LA32: # %bb.0:
|
||||
; LA32-NEXT: addi.w $sp, $sp, -512
|
||||
; LA32-NEXT: .cfi_def_cfa_offset 512
|
||||
; LA32-NEXT: st.w $ra, $sp, 508 # 4-byte Folded Spill
|
||||
; LA32-NEXT: st.w $fp, $sp, 504 # 4-byte Folded Spill
|
||||
; LA32-NEXT: addi.w $sp, $sp, -1024
|
||||
; LA32-NEXT: .cfi_def_cfa_offset 1024
|
||||
; LA32-NEXT: st.w $ra, $sp, 1020 # 4-byte Folded Spill
|
||||
; LA32-NEXT: st.w $fp, $sp, 1016 # 4-byte Folded Spill
|
||||
; LA32-NEXT: .cfi_offset 1, -4
|
||||
; LA32-NEXT: .cfi_offset 22, -8
|
||||
; LA32-NEXT: addi.w $fp, $sp, 512
|
||||
; LA32-NEXT: addi.w $fp, $sp, 1024
|
||||
; LA32-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA32-NEXT: srli.w $a0, $sp, 9
|
||||
; LA32-NEXT: slli.w $sp, $a0, 9
|
||||
; LA32-NEXT: addi.w $a0, $sp, 0
|
||||
; LA32-NEXT: addi.w $a0, $sp, 512
|
||||
; LA32-NEXT: bl %plt(callee)
|
||||
; LA32-NEXT: addi.w $sp, $fp, -512
|
||||
; LA32-NEXT: ld.w $fp, $sp, 504 # 4-byte Folded Reload
|
||||
; LA32-NEXT: ld.w $ra, $sp, 508 # 4-byte Folded Reload
|
||||
; LA32-NEXT: addi.w $sp, $sp, 512
|
||||
; LA32-NEXT: addi.w $sp, $fp, -1024
|
||||
; LA32-NEXT: ld.w $fp, $sp, 1016 # 4-byte Folded Reload
|
||||
; LA32-NEXT: ld.w $ra, $sp, 1020 # 4-byte Folded Reload
|
||||
; LA32-NEXT: addi.w $sp, $sp, 1024
|
||||
; LA32-NEXT: ret
|
||||
;
|
||||
; LA64-LABEL: caller512:
|
||||
; LA64: # %bb.0:
|
||||
; LA64-NEXT: addi.d $sp, $sp, -512
|
||||
; LA64-NEXT: .cfi_def_cfa_offset 512
|
||||
; LA64-NEXT: st.d $ra, $sp, 504 # 8-byte Folded Spill
|
||||
; LA64-NEXT: st.d $fp, $sp, 496 # 8-byte Folded Spill
|
||||
; LA64-NEXT: addi.d $sp, $sp, -1024
|
||||
; LA64-NEXT: .cfi_def_cfa_offset 1024
|
||||
; LA64-NEXT: st.d $ra, $sp, 1016 # 8-byte Folded Spill
|
||||
; LA64-NEXT: st.d $fp, $sp, 1008 # 8-byte Folded Spill
|
||||
; LA64-NEXT: .cfi_offset 1, -8
|
||||
; LA64-NEXT: .cfi_offset 22, -16
|
||||
; LA64-NEXT: addi.d $fp, $sp, 512
|
||||
; LA64-NEXT: addi.d $fp, $sp, 1024
|
||||
; LA64-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA64-NEXT: srli.d $a0, $sp, 9
|
||||
; LA64-NEXT: slli.d $sp, $a0, 9
|
||||
; LA64-NEXT: addi.d $a0, $sp, 0
|
||||
; LA64-NEXT: addi.d $a0, $sp, 512
|
||||
; LA64-NEXT: bl %plt(callee)
|
||||
; LA64-NEXT: addi.d $sp, $fp, -512
|
||||
; LA64-NEXT: ld.d $fp, $sp, 496 # 8-byte Folded Reload
|
||||
; LA64-NEXT: ld.d $ra, $sp, 504 # 8-byte Folded Reload
|
||||
; LA64-NEXT: addi.d $sp, $sp, 512
|
||||
; LA64-NEXT: addi.d $sp, $fp, -1024
|
||||
; LA64-NEXT: ld.d $fp, $sp, 1008 # 8-byte Folded Reload
|
||||
; LA64-NEXT: ld.d $ra, $sp, 1016 # 8-byte Folded Reload
|
||||
; LA64-NEXT: addi.d $sp, $sp, 1024
|
||||
; LA64-NEXT: ret
|
||||
%1 = alloca i8, align 512
|
||||
call void @callee(i8* %1)
|
||||
|
@ -379,42 +379,46 @@ define void @caller_no_realign512() "no-realign-stack" {
|
|||
define void @caller1024() {
|
||||
; LA32-LABEL: caller1024:
|
||||
; LA32: # %bb.0:
|
||||
; LA32-NEXT: addi.w $sp, $sp, -1024
|
||||
; LA32-NEXT: .cfi_def_cfa_offset 1024
|
||||
; LA32-NEXT: st.w $ra, $sp, 1020 # 4-byte Folded Spill
|
||||
; LA32-NEXT: st.w $fp, $sp, 1016 # 4-byte Folded Spill
|
||||
; LA32-NEXT: addi.w $sp, $sp, -2032
|
||||
; LA32-NEXT: .cfi_def_cfa_offset 2032
|
||||
; LA32-NEXT: st.w $ra, $sp, 2028 # 4-byte Folded Spill
|
||||
; LA32-NEXT: st.w $fp, $sp, 2024 # 4-byte Folded Spill
|
||||
; LA32-NEXT: .cfi_offset 1, -4
|
||||
; LA32-NEXT: .cfi_offset 22, -8
|
||||
; LA32-NEXT: addi.w $fp, $sp, 1024
|
||||
; LA32-NEXT: addi.w $fp, $sp, 2032
|
||||
; LA32-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA32-NEXT: addi.w $sp, $sp, -16
|
||||
; LA32-NEXT: srli.w $a0, $sp, 10
|
||||
; LA32-NEXT: slli.w $sp, $a0, 10
|
||||
; LA32-NEXT: addi.w $a0, $sp, 0
|
||||
; LA32-NEXT: addi.w $a0, $sp, 1024
|
||||
; LA32-NEXT: bl %plt(callee)
|
||||
; LA32-NEXT: addi.w $sp, $fp, -1024
|
||||
; LA32-NEXT: ld.w $fp, $sp, 1016 # 4-byte Folded Reload
|
||||
; LA32-NEXT: ld.w $ra, $sp, 1020 # 4-byte Folded Reload
|
||||
; LA32-NEXT: addi.w $sp, $sp, 1024
|
||||
; LA32-NEXT: addi.w $sp, $fp, -2048
|
||||
; LA32-NEXT: addi.w $sp, $sp, 16
|
||||
; LA32-NEXT: ld.w $fp, $sp, 2024 # 4-byte Folded Reload
|
||||
; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload
|
||||
; LA32-NEXT: addi.w $sp, $sp, 2032
|
||||
; LA32-NEXT: ret
|
||||
;
|
||||
; LA64-LABEL: caller1024:
|
||||
; LA64: # %bb.0:
|
||||
; LA64-NEXT: addi.d $sp, $sp, -1024
|
||||
; LA64-NEXT: .cfi_def_cfa_offset 1024
|
||||
; LA64-NEXT: st.d $ra, $sp, 1016 # 8-byte Folded Spill
|
||||
; LA64-NEXT: st.d $fp, $sp, 1008 # 8-byte Folded Spill
|
||||
; LA64-NEXT: addi.d $sp, $sp, -2032
|
||||
; LA64-NEXT: .cfi_def_cfa_offset 2032
|
||||
; LA64-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill
|
||||
; LA64-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill
|
||||
; LA64-NEXT: .cfi_offset 1, -8
|
||||
; LA64-NEXT: .cfi_offset 22, -16
|
||||
; LA64-NEXT: addi.d $fp, $sp, 1024
|
||||
; LA64-NEXT: addi.d $fp, $sp, 2032
|
||||
; LA64-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA64-NEXT: addi.d $sp, $sp, -16
|
||||
; LA64-NEXT: srli.d $a0, $sp, 10
|
||||
; LA64-NEXT: slli.d $sp, $a0, 10
|
||||
; LA64-NEXT: addi.d $a0, $sp, 0
|
||||
; LA64-NEXT: addi.d $a0, $sp, 1024
|
||||
; LA64-NEXT: bl %plt(callee)
|
||||
; LA64-NEXT: addi.d $sp, $fp, -1024
|
||||
; LA64-NEXT: ld.d $fp, $sp, 1008 # 8-byte Folded Reload
|
||||
; LA64-NEXT: ld.d $ra, $sp, 1016 # 8-byte Folded Reload
|
||||
; LA64-NEXT: addi.d $sp, $sp, 1024
|
||||
; LA64-NEXT: addi.d $sp, $fp, -2048
|
||||
; LA64-NEXT: addi.d $sp, $sp, 16
|
||||
; LA64-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload
|
||||
; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload
|
||||
; LA64-NEXT: addi.d $sp, $sp, 2032
|
||||
; LA64-NEXT: ret
|
||||
%1 = alloca i8, align 1024
|
||||
call void @callee(i8* %1)
|
||||
|
@ -461,13 +465,17 @@ define void @caller2048() {
|
|||
; LA32-NEXT: .cfi_offset 22, -8
|
||||
; LA32-NEXT: addi.w $fp, $sp, 2032
|
||||
; LA32-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA32-NEXT: addi.w $sp, $sp, -2048
|
||||
; LA32-NEXT: addi.w $sp, $sp, -16
|
||||
; LA32-NEXT: srli.w $a0, $sp, 11
|
||||
; LA32-NEXT: slli.w $sp, $a0, 11
|
||||
; LA32-NEXT: addi.w $a0, $sp, 0
|
||||
; LA32-NEXT: ori $a0, $zero, 2048
|
||||
; LA32-NEXT: add.w $a0, $sp, $a0
|
||||
; LA32-NEXT: bl %plt(callee)
|
||||
; LA32-NEXT: addi.w $sp, $fp, -2048
|
||||
; LA32-NEXT: addi.w $sp, $sp, 16
|
||||
; LA32-NEXT: lu12i.w $a0, 1
|
||||
; LA32-NEXT: sub.w $sp, $fp, $a0
|
||||
; LA32-NEXT: addi.w $sp, $sp, 2032
|
||||
; LA32-NEXT: addi.w $sp, $sp, 32
|
||||
; LA32-NEXT: ld.w $fp, $sp, 2024 # 4-byte Folded Reload
|
||||
; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload
|
||||
; LA32-NEXT: addi.w $sp, $sp, 2032
|
||||
|
@ -483,13 +491,17 @@ define void @caller2048() {
|
|||
; LA64-NEXT: .cfi_offset 22, -16
|
||||
; LA64-NEXT: addi.d $fp, $sp, 2032
|
||||
; LA64-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA64-NEXT: addi.d $sp, $sp, -2048
|
||||
; LA64-NEXT: addi.d $sp, $sp, -16
|
||||
; LA64-NEXT: srli.d $a0, $sp, 11
|
||||
; LA64-NEXT: slli.d $sp, $a0, 11
|
||||
; LA64-NEXT: addi.d $a0, $sp, 0
|
||||
; LA64-NEXT: ori $a0, $zero, 2048
|
||||
; LA64-NEXT: add.d $a0, $sp, $a0
|
||||
; LA64-NEXT: bl %plt(callee)
|
||||
; LA64-NEXT: addi.d $sp, $fp, -2048
|
||||
; LA64-NEXT: addi.d $sp, $sp, 16
|
||||
; LA64-NEXT: lu12i.w $a0, 1
|
||||
; LA64-NEXT: sub.d $sp, $fp, $a0
|
||||
; LA64-NEXT: addi.d $sp, $sp, 2032
|
||||
; LA64-NEXT: addi.d $sp, $sp, 32
|
||||
; LA64-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload
|
||||
; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload
|
||||
; LA64-NEXT: addi.d $sp, $sp, 2032
|
||||
|
@ -539,16 +551,19 @@ define void @caller4096() {
|
|||
; LA32-NEXT: .cfi_offset 22, -8
|
||||
; LA32-NEXT: addi.w $fp, $sp, 2032
|
||||
; LA32-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA32-NEXT: addi.w $sp, $sp, -2048
|
||||
; LA32-NEXT: addi.w $sp, $sp, -16
|
||||
; LA32-NEXT: lu12i.w $a0, 1
|
||||
; LA32-NEXT: ori $a0, $a0, 2064
|
||||
; LA32-NEXT: sub.w $sp, $sp, $a0
|
||||
; LA32-NEXT: srli.w $a0, $sp, 12
|
||||
; LA32-NEXT: slli.w $sp, $a0, 12
|
||||
; LA32-NEXT: addi.w $a0, $sp, 0
|
||||
; LA32-NEXT: bl %plt(callee)
|
||||
; LA32-NEXT: lu12i.w $a0, 1
|
||||
; LA32-NEXT: add.w $a0, $sp, $a0
|
||||
; LA32-NEXT: bl %plt(callee)
|
||||
; LA32-NEXT: lu12i.w $a0, 2
|
||||
; LA32-NEXT: sub.w $sp, $fp, $a0
|
||||
; LA32-NEXT: addi.w $sp, $sp, 2032
|
||||
; LA32-NEXT: addi.w $sp, $sp, 32
|
||||
; LA32-NEXT: lu12i.w $a0, 1
|
||||
; LA32-NEXT: ori $a0, $a0, 2064
|
||||
; LA32-NEXT: add.w $sp, $sp, $a0
|
||||
; LA32-NEXT: ld.w $fp, $sp, 2024 # 4-byte Folded Reload
|
||||
; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload
|
||||
; LA32-NEXT: addi.w $sp, $sp, 2032
|
||||
|
@ -564,16 +579,19 @@ define void @caller4096() {
|
|||
; LA64-NEXT: .cfi_offset 22, -16
|
||||
; LA64-NEXT: addi.d $fp, $sp, 2032
|
||||
; LA64-NEXT: .cfi_def_cfa 22, 0
|
||||
; LA64-NEXT: addi.d $sp, $sp, -2048
|
||||
; LA64-NEXT: addi.d $sp, $sp, -16
|
||||
; LA64-NEXT: lu12i.w $a0, 1
|
||||
; LA64-NEXT: ori $a0, $a0, 2064
|
||||
; LA64-NEXT: sub.d $sp, $sp, $a0
|
||||
; LA64-NEXT: srli.d $a0, $sp, 12
|
||||
; LA64-NEXT: slli.d $sp, $a0, 12
|
||||
; LA64-NEXT: addi.d $a0, $sp, 0
|
||||
; LA64-NEXT: bl %plt(callee)
|
||||
; LA64-NEXT: lu12i.w $a0, 1
|
||||
; LA64-NEXT: add.d $a0, $sp, $a0
|
||||
; LA64-NEXT: bl %plt(callee)
|
||||
; LA64-NEXT: lu12i.w $a0, 2
|
||||
; LA64-NEXT: sub.d $sp, $fp, $a0
|
||||
; LA64-NEXT: addi.d $sp, $sp, 2032
|
||||
; LA64-NEXT: addi.d $sp, $sp, 32
|
||||
; LA64-NEXT: lu12i.w $a0, 1
|
||||
; LA64-NEXT: ori $a0, $a0, 2064
|
||||
; LA64-NEXT: add.d $sp, $sp, $a0
|
||||
; LA64-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload
|
||||
; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload
|
||||
; LA64-NEXT: addi.d $sp, $sp, 2032
|
||||
|
|
Loading…
Reference in New Issue