[Ventus] ABI and stack adjustment.
Remove all SGPRs(except ra) from callee saved register set, as they are mainly used in kernel function. Unify the stack to use TP only, we will emit customized instructions for SP use which should not be considered as stack according to LLVM codegen infrastructure(only 1 stack is allowed). By unifying the stack to TP based, it is much easiler for the backend codegen.
This commit is contained in:
parent
a6e8ff959a
commit
e6b7935c89
|
@ -316,10 +316,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||||
bool IsEntryFunction = RMFI->isEntryFunction();
|
bool IsEntryFunction = RMFI->isEntryFunction();
|
||||||
|
|
||||||
Register FPReg = getFPReg(STI);
|
|
||||||
Register SPReg = getSPReg(STI);
|
Register SPReg = getSPReg(STI);
|
||||||
Register TPReg = getTPReg(STI);
|
Register TPReg = getTPReg(STI);
|
||||||
Register BPReg = RISCVABI::getBPReg();
|
|
||||||
|
|
||||||
// Debug location must be unknown since the first debug location is used
|
// Debug location must be unknown since the first debug location is used
|
||||||
// to determine the end of the prologue.
|
// to determine the end of the prologue.
|
||||||
|
@ -342,7 +340,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
determineFrameLayout(MF);
|
determineFrameLayout(MF);
|
||||||
|
|
||||||
// Determine stack ID for each frame index
|
// Determine stack ID for each frame index
|
||||||
deterMineStackID(MF);
|
determineStackID(MF);
|
||||||
|
|
||||||
// If libcalls are used to spill and restore callee-saved registers, the frame
|
// If libcalls are used to spill and restore callee-saved registers, the frame
|
||||||
// has two sections; the opaque section managed by the libcalls, and the
|
// has two sections; the opaque section managed by the libcalls, and the
|
||||||
|
@ -373,9 +371,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
// FIXME: TP stack size calculation is also not
|
// FIXME: TP stack size calculation is also not
|
||||||
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
||||||
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||||
uint64_t RealStackSize = IsEntryFunction ?
|
//uint64_t RealStackSize = IsEntryFunction ?
|
||||||
SPStackSize + RMFI->getLibCallStackSize() :
|
// SPStackSize + RMFI->getLibCallStackSize() :
|
||||||
TPStackSize + RMFI->getLibCallStackSize();
|
// TPStackSize + RMFI->getLibCallStackSize();
|
||||||
|
|
||||||
// Early exit if there is no need to allocate on the stack
|
// Early exit if there is no need to allocate on the stack
|
||||||
if (MFI.getStackSize() == 0 && !MFI.adjustsStack())
|
if (MFI.getStackSize() == 0 && !MFI.adjustsStack())
|
||||||
|
@ -391,29 +389,32 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
|
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
|
||||||
MF.getFunction(), "Thread pointer required, but has been reserved."});
|
MF.getFunction(), "Thread pointer required, but has been reserved."});
|
||||||
|
|
||||||
|
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
||||||
|
if(SPStackSize) {
|
||||||
|
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
||||||
|
StackOffset::getFixed(SPStackSize),
|
||||||
|
MachineInstr::FrameSetup, getStackAlign());
|
||||||
|
|
||||||
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
|
// Emit ".cfi_def_cfa_offset SPStackSize"
|
||||||
// Split the SP adjustment to reduce the offsets of callee saved spill.
|
unsigned CFIIndex = MF.addFrameInst(
|
||||||
if (FirstSPAdjustAmount) {
|
MCCFIInstruction::cfiDefCfaOffset(nullptr, SPStackSize));
|
||||||
SPStackSize = FirstSPAdjustAmount;
|
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||||
RealStackSize = FirstSPAdjustAmount;
|
.addCFIIndex(CFIIndex)
|
||||||
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
if(TPStackSize) {
|
||||||
if(SPStackSize)
|
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(SPStackSize),
|
|
||||||
MachineInstr::FrameSetup, getStackAlign());
|
|
||||||
if(TPStackSize)
|
|
||||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||||
StackOffset::getFixed(TPStackSize),
|
StackOffset::getFixed(TPStackSize),
|
||||||
MachineInstr::FrameSetup, getStackAlign());
|
MachineInstr::FrameSetup, getStackAlign());
|
||||||
|
|
||||||
// Emit ".cfi_def_cfa_offset RealStackSize"
|
// Emit ".cfi_def_cfa_offset TPStackSize"
|
||||||
unsigned CFIIndex = MF.addFrameInst(
|
unsigned CFIIndex = MF.addFrameInst(
|
||||||
MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
|
MCCFIInstruction::cfiDefCfaOffset(nullptr, TPStackSize));
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||||
.addCFIIndex(CFIIndex)
|
.addCFIIndex(CFIIndex)
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
|
}
|
||||||
|
|
||||||
const auto &CSI = MFI.getCalleeSavedInfo();
|
const auto &CSI = MFI.getCalleeSavedInfo();
|
||||||
|
|
||||||
|
@ -444,96 +445,15 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
.addCFIIndex(CFIIndex)
|
.addCFIIndex(CFIIndex)
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate new FP.
|
|
||||||
if (hasFP(MF)) {
|
|
||||||
if (STI.isRegisterReservedByUser(FPReg))
|
|
||||||
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
|
|
||||||
MF.getFunction(), "Frame pointer required, but has been reserved."});
|
|
||||||
// The frame pointer does need to be reserved from register allocation.
|
|
||||||
// assert(MF.getRegInfo().isReserved(FPReg) && "FP not reserved");
|
|
||||||
|
|
||||||
RI->adjustReg(MBB, MBBI, DL, FPReg, IsEntryFunction ? SPReg : TPReg,
|
|
||||||
-StackOffset::getFixed(RealStackSize - RMFI->getVarArgsSaveSize()),
|
|
||||||
MachineInstr::FrameSetup, getStackAlign());
|
|
||||||
|
|
||||||
// Emit ".cfi_def_cfa $fp, RVFI->getVarArgsSaveSize()"
|
|
||||||
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
|
|
||||||
nullptr, RI->getDwarfRegNum(FPReg, true), RMFI->getVarArgsSaveSize()));
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
|
||||||
.addCFIIndex(CFIIndex)
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit the second SP adjustment after saving callee saved registers.
|
|
||||||
if (FirstSPAdjustAmount) {
|
|
||||||
uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
|
|
||||||
assert(SecondSPAdjustAmount > 0 &&
|
|
||||||
"SecondSPAdjustAmount should be greater than zero");
|
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
|
||||||
StackOffset::getFixed(SecondSPAdjustAmount),
|
|
||||||
MachineInstr::FrameSetup, getStackAlign());
|
|
||||||
|
|
||||||
// If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0",
|
|
||||||
// don't emit an sp-based .cfi_def_cfa_offset
|
|
||||||
if (!hasFP(MF)) {
|
|
||||||
// Emit ".cfi_def_cfa_offset StackSize"
|
|
||||||
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
|
|
||||||
nullptr, MFI.getStackSize()));
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
|
||||||
.addCFIIndex(CFIIndex)
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hasFP(MF)) {
|
|
||||||
// Realign Stack
|
|
||||||
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
|
||||||
if (RI->hasStackRealignment(MF)) {
|
|
||||||
Align MaxAlignment = MFI.getMaxAlign();
|
|
||||||
|
|
||||||
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
|
||||||
if (isInt<12>(-(int)MaxAlignment.value())) {
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
|
|
||||||
.addReg(SPReg)
|
|
||||||
.addImm(-(int)MaxAlignment.value())
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
} else {
|
|
||||||
unsigned ShiftAmount = Log2(MaxAlignment);
|
|
||||||
Register VR =
|
|
||||||
MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
|
|
||||||
.addReg(SPReg)
|
|
||||||
.addImm(ShiftAmount)
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
|
|
||||||
.addReg(VR)
|
|
||||||
.addImm(ShiftAmount)
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
}
|
|
||||||
// FP will be used to restore the frame in the epilogue, so we need
|
|
||||||
// another base register BP to record SP after re-alignment. SP will
|
|
||||||
// track the current stack after allocating variable sized objects.
|
|
||||||
if (hasBP(MF)) {
|
|
||||||
// move BP, SP
|
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI), BPReg)
|
|
||||||
.addReg(SPReg)
|
|
||||||
.addImm(0)
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
MachineBasicBlock &MBB) const {
|
MachineBasicBlock &MBB) const {
|
||||||
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
auto *RMFI = MF.getInfo<RISCVMachineFunctionInfo>();
|
|
||||||
Register FPReg = getFPReg(STI);
|
|
||||||
Register SPReg = getSPReg(STI);
|
Register SPReg = getSPReg(STI);
|
||||||
Register TPReg = getTPReg(STI);
|
Register TPReg = getTPReg(STI);
|
||||||
bool IsEntryFunction = RMFI->isEntryFunction();
|
|
||||||
// Get the insert location for the epilogue. If there were no terminators in
|
// Get the insert location for the epilogue. If there were no terminators in
|
||||||
// the block, get the last instruction.
|
// the block, get the last instruction.
|
||||||
MachineBasicBlock::iterator MBBI = MBB.end();
|
MachineBasicBlock::iterator MBBI = MBB.end();
|
||||||
|
@ -561,49 +481,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
if (!CSI.empty())
|
if (!CSI.empty())
|
||||||
LastFrameDestroy = std::prev(MBBI, CSI.size());
|
LastFrameDestroy = std::prev(MBBI, CSI.size());
|
||||||
|
|
||||||
// FIXME: Need to get 2 stack size for TP and SP!
|
// Get 2 stack size for TP and SP
|
||||||
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
||||||
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||||
uint64_t RealStackSize = IsEntryFunction ? SPStackSize :
|
|
||||||
TPStackSize + RMFI->getLibCallStackSize();
|
|
||||||
uint64_t FPOffset = RealStackSize - RMFI->getVarArgsSaveSize();
|
|
||||||
|
|
||||||
// Restore the stack pointer using the value of the frame pointer. Only
|
|
||||||
// necessary if the stack pointer was modified, meaning the stack size is
|
|
||||||
// unknown.
|
|
||||||
//
|
|
||||||
// In order to make sure the stack point is right through the EH region,
|
|
||||||
// we also need to restore stack pointer from the frame pointer if we
|
|
||||||
// don't preserve stack space within prologue/epilogue for outgoing variables,
|
|
||||||
// normally it's just checking the variable sized object is present or not
|
|
||||||
// is enough, but we also don't preserve that at prologue/epilogue when
|
|
||||||
// have vector objects in stack.
|
|
||||||
if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
|
|
||||||
!hasReservedCallFrame(MF)) {
|
|
||||||
assert(hasFP(MF) && "frame pointer should not have been eliminated");
|
|
||||||
RI->adjustReg(MBB, LastFrameDestroy, DL, SPReg, FPReg,
|
|
||||||
StackOffset::getFixed(-FPOffset),
|
|
||||||
MachineInstr::FrameDestroy, getStackAlign());
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
|
|
||||||
if (FirstSPAdjustAmount) {
|
|
||||||
uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
|
|
||||||
assert(SecondSPAdjustAmount > 0 &&
|
|
||||||
"SecondSPAdjustAmount should be greater than zero");
|
|
||||||
|
|
||||||
RI->adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg,
|
|
||||||
StackOffset::getFixed(-SecondSPAdjustAmount),
|
|
||||||
MachineInstr::FrameDestroy, getStackAlign());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FirstSPAdjustAmount)
|
|
||||||
SPStackSize = FirstSPAdjustAmount;
|
|
||||||
|
|
||||||
// Deallocate stack
|
// Deallocate stack
|
||||||
|
|
||||||
// FIXME: Allocate space for two stacks, this is depend on the actual use of
|
|
||||||
// these two stacks, not based on calling convention
|
|
||||||
if(SPStackSize)
|
if(SPStackSize)
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
||||||
StackOffset::getFixed(-SPStackSize),
|
StackOffset::getFixed(-SPStackSize),
|
||||||
|
@ -621,8 +503,6 @@ StackOffset
|
||||||
RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||||
Register &FrameReg) const {
|
Register &FrameReg) const {
|
||||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
|
|
||||||
const auto *RMFI = MF.getInfo<RISCVMachineFunctionInfo>();
|
|
||||||
|
|
||||||
// Callee-saved registers should be referenced relative to the stack
|
// Callee-saved registers should be referenced relative to the stack
|
||||||
// pointer (positive offset), otherwise use the frame pointer (negative
|
// pointer (positive offset), otherwise use the frame pointer (negative
|
||||||
|
@ -640,107 +520,24 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||||
StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
|
StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
|
||||||
MFI.getOffsetAdjustment());
|
MFI.getOffsetAdjustment());
|
||||||
|
|
||||||
uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
|
|
||||||
|
|
||||||
if (CSI.size()) {
|
if (CSI.size()) {
|
||||||
MinCSFI = CSI[0].getFrameIdx();
|
MinCSFI = CSI[0].getFrameIdx();
|
||||||
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
|
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
|
||||||
}
|
}
|
||||||
|
|
||||||
// This goes to the offset calculation of callee saved register, ra/s0
|
// Different stacks for sALU and vALU threads.
|
||||||
if (FI >= MinCSFI && FI <= MaxCSFI) {
|
FrameReg = StackID == RISCVStackID::SGPRSpill ? RISCV::X2 : RISCV::X4;
|
||||||
// sp represents SGPR spill, tp represents VGPR spill
|
|
||||||
// FIXME: we need to define TargetStackID::VGPRSpill?
|
// TODO: This only saves sGPR CSRs, as we haven't define vGPR CSRs
|
||||||
FrameReg = StackID == TargetStackID::SGPRSpill ? RISCV::X2 : RISCV::X4;
|
// within getNonLibcallCSI.
|
||||||
// if (FirstSPAdjustAmount)
|
//if (FI >= MinCSFI && FI <= MaxCSFI) {
|
||||||
// Offset -= StackOffset::getFixed(FirstSPAdjustAmount);
|
Offset -= StackOffset::getFixed(
|
||||||
// else
|
getStackSize(const_cast<MachineFunction&>(MF),
|
||||||
Offset -= StackOffset::getFixed(getStackSize(const_cast<MachineFunction&>(MF)
|
(RISCVStackID::Value)StackID));
|
||||||
, RISCVStackID::SGPRSpill));
|
|
||||||
return Offset;
|
return Offset;
|
||||||
}
|
//}
|
||||||
|
|
||||||
// assert(StackID == TargetStackID::Default &&
|
//return Offset;
|
||||||
// "SGPRSpill stack should not reach here!");
|
|
||||||
|
|
||||||
if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) {
|
|
||||||
// TODO: add stack alignment
|
|
||||||
// assert(0 && "TODO: Add stack realignment support for Ventus?");
|
|
||||||
// If the per-thread stack was realigned, the frame pointer is set in order
|
|
||||||
// to allow TP to be restored, so we need another base register to record
|
|
||||||
// the stack after realignment.
|
|
||||||
// |--------------------------| -- <-- FP
|
|
||||||
// | callee-allocated save | | <----|
|
|
||||||
// | area for register varargs| | |
|
|
||||||
// |--------------------------| | |
|
|
||||||
// | callee-saved registers | | |
|
|
||||||
// |--------------------------| -- |
|
|
||||||
// | realignment (the size of | | |
|
|
||||||
// | this area is not counted | | |
|
|
||||||
// | in MFI.getStackSize()) | | |
|
|
||||||
// |--------------------------| -- |-- MFI.getStackSize()
|
|
||||||
// | scalar local variables | | <----'
|
|
||||||
// |--------------------------| -- <-- BP (if var sized objects present)
|
|
||||||
// | VarSize objects | |
|
|
||||||
// |--------------------------| -- <-- TP
|
|
||||||
if (hasBP(MF)) {
|
|
||||||
FrameReg = RISCVABI::getBPReg();
|
|
||||||
} else {
|
|
||||||
// VarSize objects must be empty in this case!
|
|
||||||
assert(!MFI.hasVarSizedObjects());
|
|
||||||
FrameReg = RISCV::X4;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
FrameReg = RI->getFrameRegister(MF);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FrameReg == getFPReg(STI)) {
|
|
||||||
// assert(0 && "TODO: Add fp support for Ventus?");
|
|
||||||
Offset -= StackOffset::getFixed(RMFI->getVarArgsSaveSize());
|
|
||||||
if (FI >= 0)
|
|
||||||
Offset -= StackOffset::getFixed(RMFI->getLibCallStackSize());
|
|
||||||
// When using FP to access scalable vector objects, we need to minus
|
|
||||||
// the frame size.
|
|
||||||
//
|
|
||||||
// |--------------------------| -- <-- FP
|
|
||||||
// | callee-allocated save | |
|
|
||||||
// | area for register varargs| |
|
|
||||||
// |--------------------------| |
|
|
||||||
// | callee-saved registers | |
|
|
||||||
// |--------------------------| | MFI.getStackSize()
|
|
||||||
// | scalar local variables | |
|
|
||||||
// |--------------------------| --
|
|
||||||
// | VarSize objects |
|
|
||||||
// |--------------------------| <-- SP
|
|
||||||
return Offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This case handles indexing off both SP and BP.
|
|
||||||
// If indexing off SP, there must not be any var sized objects
|
|
||||||
assert(FrameReg == RISCVABI::getBPReg() || !MFI.hasVarSizedObjects());
|
|
||||||
|
|
||||||
// |--------------------------| -- <-- FP
|
|
||||||
// | callee-allocated save | | <----|
|
|
||||||
// | area for register varargs| | |
|
|
||||||
// |--------------------------| | |
|
|
||||||
// | callee-saved registers | | |
|
|
||||||
// |--------------------------| -- |
|
|
||||||
// | scalar local variables | | <----'
|
|
||||||
// |--------------------------| -- <-- BP (if var sized objects present)
|
|
||||||
// | VarSize objects | |
|
|
||||||
// |--------------------------| -- <-- SP
|
|
||||||
//
|
|
||||||
if (MFI.isFixedObjectIndex(FI)) {
|
|
||||||
// assert(0 && "TODO!");
|
|
||||||
assert(!RI->hasStackRealignment(MF) &&
|
|
||||||
"Can't index across variable sized realign");
|
|
||||||
Offset -= StackOffset::get(MFI.getStackSize() +
|
|
||||||
RMFI->getLibCallStackSize(),0);
|
|
||||||
} else {
|
|
||||||
Offset -= StackOffset::getFixed(MFI.getStackSize());
|
|
||||||
}
|
|
||||||
|
|
||||||
return Offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||||
|
@ -877,7 +674,7 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF,
|
uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF,
|
||||||
RISCVStackID::Value ID) const {
|
RISCVStackID::Value ID) const {
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
uint64_t StackSize = 0;
|
uint64_t StackSize = 0;
|
||||||
|
|
||||||
|
@ -887,23 +684,25 @@ uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF,
|
||||||
uint64_t Align = MFI.getObjectAlign(I).value();
|
uint64_t Align = MFI.getObjectAlign(I).value();
|
||||||
uint64_t ActualAlignSize = (Align + 3) >> 2;
|
uint64_t ActualAlignSize = (Align + 3) >> 2;
|
||||||
uint64_t Size = ActualAlignSize * MFI.getObjectSize(I);
|
uint64_t Size = ActualAlignSize * MFI.getObjectSize(I);
|
||||||
StackSize += Size;
|
StackSize += Size;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return StackSize;
|
return StackSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RISCVFrameLowering::deterMineStackID(MachineFunction &MF) const {
|
void RISCVFrameLowering::determineStackID(MachineFunction &MF) const {
|
||||||
llvm::MachineFrameInfo &MFI = MF.getFrameInfo();
|
llvm::MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
|
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
|
||||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I);
|
// FIXME: There is no sGPR spill stack!
|
||||||
if((MFI.getStackID(I) != RISCVStackID::SGPRSpill) &&
|
MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
||||||
PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
|
|
||||||
MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
// MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I);
|
||||||
else
|
// if(MFI.getStackID(I) != RISCVStackID::SGPRSpill &&
|
||||||
// FIXME: other stack?
|
// PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
|
||||||
MFI.setStackID(I, RISCVStackID::SGPRSpill);
|
// MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
||||||
|
// else
|
||||||
|
// MFI.setStackID(I, RISCVStackID::SGPRSpill);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -938,12 +737,14 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
|
||||||
Register Reg = CS.getReg();
|
Register Reg = CS.getReg();
|
||||||
|
|
||||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
||||||
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255 ) {
|
// TODO: Have we allocated stack for vGPR spilling?
|
||||||
|
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255) {
|
||||||
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
|
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
|
||||||
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
} else {
|
||||||
RC, TRI);
|
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::VGPRSpill);
|
||||||
}
|
}
|
||||||
|
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
||||||
|
RC, TRI);
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -68,7 +68,7 @@ public:
|
||||||
uint64_t getStackSize(MachineFunction &MF, RISCVStackID::Value ID) const;
|
uint64_t getStackSize(MachineFunction &MF, RISCVStackID::Value ID) const;
|
||||||
|
|
||||||
/// Before insert prolog/epilog information, set stack ID for each frame index
|
/// Before insert prolog/epilog information, set stack ID for each frame index
|
||||||
void deterMineStackID(MachineFunction &MF) const;
|
void determineStackID(MachineFunction &MF) const;
|
||||||
|
|
||||||
bool enableShrinkWrapping(const MachineFunction &MF) const override;
|
bool enableShrinkWrapping(const MachineFunction &MF) const override;
|
||||||
|
|
||||||
|
|
|
@ -261,7 +261,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
|
|
||||||
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
|
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
|
||||||
Register FrameReg;
|
Register FrameReg;
|
||||||
StackOffset Offset =
|
StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on divergency route.
|
||||||
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
|
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
|
||||||
|
|
||||||
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
|
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
|
||||||
|
@ -305,13 +305,8 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
}
|
}
|
||||||
|
|
||||||
Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
||||||
const TargetFrameLowering *TFI = getFrameLowering(MF);
|
return MF.getInfo<RISCVMachineFunctionInfo>()->isEntryFunction()
|
||||||
const RISCVMachineFunctionInfo *FuncInfo =
|
? RISCV::X2 : RISCV::X4;
|
||||||
MF.getInfo<RISCVMachineFunctionInfo>();
|
|
||||||
if(FuncInfo->isEntryFunction())
|
|
||||||
return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X2;
|
|
||||||
// Non-kernel function, we also use X8 for frame pointer
|
|
||||||
return TFI->hasFP(MF) ? RISCV::X8 : RISCV::X4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint32_t *
|
const uint32_t *
|
||||||
|
|
|
@ -355,20 +355,18 @@ unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
|
||||||
return BaseT::getRegUsageForType(Ty);
|
return BaseT::getRegUsageForType(Ty);
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Copy from AMDGPU
|
|
||||||
bool RISCVTTIImpl::isSourceOfDivergence(const Value *V) const {
|
bool RISCVTTIImpl::isSourceOfDivergence(const Value *V) const {
|
||||||
// if (const Argument *A = dyn_cast<Argument>(V))
|
// if (const Argument *A = dyn_cast<Argument>(V))
|
||||||
// return !AMDGPU::isArgPassedInSGPR(A);
|
// return !AMDGPU::isArgPassedInSGPR(A);
|
||||||
|
|
||||||
// Loads from the private and flat address spaces are divergent, because
|
// Loads from the private memory are divergent, because
|
||||||
// threads can execute the load instruction with the same inputs and get
|
// threads can execute the load instruction with the same inputs and get
|
||||||
// different results.
|
// different results.
|
||||||
//
|
//
|
||||||
// All other loads are not divergent, because if threads issue loads with the
|
// All other loads are not divergent, because if threads issue loads with the
|
||||||
// same arguments, they will always get the same result.
|
// same arguments, they will always get the same result.
|
||||||
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
|
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
|
||||||
return Load->getPointerAddressSpace() == RISCVAS::PRIVATE_ADDRESS ||
|
return Load->getPointerAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
|
||||||
Load->getPointerAddressSpace() == RISCVAS::LOCAL_ADDRESS;
|
|
||||||
|
|
||||||
// Atomics are divergent because they are executed sequentially: when an
|
// Atomics are divergent because they are executed sequentially: when an
|
||||||
// atomic operation refers to the same address in each thread, then each
|
// atomic operation refers to the same address in each thread, then each
|
||||||
|
@ -380,25 +378,20 @@ bool RISCVTTIImpl::isSourceOfDivergence(const Value *V) const {
|
||||||
// if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
|
// if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
|
||||||
// if (Intrinsic->getIntrinsicID() == Intrinsic::read_register)
|
// if (Intrinsic->getIntrinsicID() == Intrinsic::read_register)
|
||||||
// return isReadRegisterSourceOfDivergence(Intrinsic);
|
// return isReadRegisterSourceOfDivergence(Intrinsic);
|
||||||
|
|
||||||
// return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
|
// return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID());
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// Assume all function calls are a source of divergence.
|
// Assume all function calls are a source of divergence.
|
||||||
// if (const CallInst *CI = dyn_cast<CallInst>(V)) {
|
if (const CallInst *CI = dyn_cast<CallInst>(V)) {
|
||||||
// if (CI->isInlineAsm() && isa<IntrinsicInst>(CI))
|
if (CI->isInlineAsm() && isa<IntrinsicInst>(CI))
|
||||||
// return RISCVII::isIntrinsicSourceOfDivergence(
|
return RISCVII::isIntrinsicSourceOfDivergence(
|
||||||
// cast<IntrinsicInst>(CI)->getIntrinsicID());
|
cast<IntrinsicInst>(CI)->getIntrinsicID());
|
||||||
// return true;
|
return true;
|
||||||
// }
|
}
|
||||||
|
|
||||||
// Assume all function calls are a source of divergence.
|
// Assume all function calls are a source of divergence.
|
||||||
if (isa<InvokeInst>(V))
|
if (isa<InvokeInst>(V))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// Assume all function calls are a source of divergence.
|
|
||||||
if (isa<CallInst>(V))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,9 +13,12 @@
|
||||||
// The Ventus GPGPU calling convention is handled with custom code in
|
// The Ventus GPGPU calling convention is handled with custom code in
|
||||||
// RISCVISelLowering.cpp (CC_Ventus).
|
// RISCVISelLowering.cpp (CC_Ventus).
|
||||||
|
|
||||||
|
// sGPRs (except ra) can only be set by kernel function, so there is no callee
|
||||||
|
// saved sGPRs is needed.
|
||||||
def CSR_ILP32_LP64
|
def CSR_ILP32_LP64
|
||||||
: CalleeSavedRegs<(add X1, X3, X8, X9, (sequence "X%u", 18, 27),
|
: CalleeSavedRegs<(add X1, (sequence "V%u", 32, 255))>;
|
||||||
(sequence "V%u", 32, 47))>;
|
|
||||||
|
// Following are not used by Ventus GPGPU.
|
||||||
|
|
||||||
def CSR_ILP32F_LP64F
|
def CSR_ILP32F_LP64F
|
||||||
: CalleeSavedRegs<(add CSR_ILP32_LP64,
|
: CalleeSavedRegs<(add CSR_ILP32_LP64,
|
||||||
|
|
|
@ -247,13 +247,9 @@ def XLenRI : RegInfoByHwMode<
|
||||||
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
|
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
|
||||||
|
|
||||||
// The order of registers represents the preferred allocation sequence.
|
// The order of registers represents the preferred allocation sequence.
|
||||||
// Registers are listed in the order caller-save, callee-save, specials.
|
// All sGPRs are callee saved registers.
|
||||||
def GPR : RVRegisterClass<"RISCV", [XLenVT], 32, (add
|
def GPR : RVRegisterClass<"RISCV", [XLenVT], 32, (add
|
||||||
(sequence "X%u", 10, 17),
|
(sequence "X%u", 5, 63),
|
||||||
(sequence "X%u", 5, 7),
|
|
||||||
(sequence "X%u", 28, 31),
|
|
||||||
(sequence "X%u", 8, 9),
|
|
||||||
(sequence "X%u", 18, 27),
|
|
||||||
(sequence "X%u", 0, 4)
|
(sequence "X%u", 0, 4)
|
||||||
)> {
|
)> {
|
||||||
let RegInfos = XLenRI;
|
let RegInfos = XLenRI;
|
||||||
|
|
Loading…
Reference in New Issue