[VENTUS][fix] Put local variables declared in kernel function into shared memory
This commit is contained in:
parent
a48f51ab76
commit
87fe5f3ce8
|
@ -39,14 +39,18 @@ _start:
|
||||||
li t4,32
|
li t4,32
|
||||||
vsetvli t4,t4,e32,m1,ta,ma
|
vsetvli t4,t4,e32,m1,ta,ma
|
||||||
li t4,0x2000
|
li t4,0x2000
|
||||||
csrrs t4,mstatus,t4
|
csrrs t4, mstatus, t4
|
||||||
li t4, 0
|
li t4, 0
|
||||||
csrr t1, CSR_WID
|
csrr t1, CSR_WID
|
||||||
csrr t2, CSR_LDS
|
csrr t2, CSR_LDS
|
||||||
li t3, 1024 # 1k size for single warp
|
li t3, 1024 # 1M size for single warp
|
||||||
mul t1, t1, t3 # sp = lds + wid * warp_size
|
mul t1, t1, t3 # sp wid * warp_size
|
||||||
add sp, t1, t2 # sp points to baseaddr of local memory of each SM
|
add sp, t1, t2 # sp points to baseaddr of local memory of each SM
|
||||||
li tp, 0 # tp points to baseaddr for lower bound of private memory(1K) of each thread
|
li tp, 0 # tp points to baseaddr for lower bound of private memory(1K) of each thread
|
||||||
|
csrr t5, CSR_NUMW
|
||||||
|
li t3, 1024
|
||||||
|
mul t5, t5, t3
|
||||||
|
add s0, t2, t5 # s0 points to local memory base addr in a workgroup
|
||||||
|
|
||||||
# clear BSS segment
|
# clear BSS segment
|
||||||
la a0, _edata
|
la a0, _edata
|
||||||
|
|
|
@ -116,6 +116,7 @@ enum Value {
|
||||||
ScalableVector = 2,
|
ScalableVector = 2,
|
||||||
WasmLocal = 3,
|
WasmLocal = 3,
|
||||||
VGPRSpill = 4,
|
VGPRSpill = 4,
|
||||||
|
LocalMemSpill = 5,
|
||||||
NoAlloc = 255
|
NoAlloc = 255
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -300,7 +300,8 @@ getNonLibcallCSI(const MachineFunction &MF,
|
||||||
// TODO: For now, we don't define VGPR callee saved registers, when we later
|
// TODO: For now, we don't define VGPR callee saved registers, when we later
|
||||||
// add VGPR callee saved register, remember to modify here
|
// add VGPR callee saved register, remember to modify here
|
||||||
if (FI >= 0 && (MFI.getStackID(FI) == RISCVStackID::Default ||
|
if (FI >= 0 && (MFI.getStackID(FI) == RISCVStackID::Default ||
|
||||||
MFI.getStackID(FI) == RISCVStackID::SGPRSpill))
|
MFI.getStackID(FI) == RISCVStackID::SGPRSpill ||
|
||||||
|
MFI.getStackID(FI) == RISCVStackID::VGPRSpill))
|
||||||
NonLibcallCSI.push_back(CS);
|
NonLibcallCSI.push_back(CS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -374,6 +375,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
|
|
||||||
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
||||||
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||||
|
uint64_t LocalStackSize = getStackSize(MF, RISCVStackID::LocalMemSpill);
|
||||||
|
|
||||||
// FIXME: need to add local data declaration calculation
|
// FIXME: need to add local data declaration calculation
|
||||||
CurrentSubProgramInfo->LDSMemory += SPStackSize;
|
CurrentSubProgramInfo->LDSMemory += SPStackSize;
|
||||||
CurrentSubProgramInfo->PDSMemory += TPStackSize;
|
CurrentSubProgramInfo->PDSMemory += TPStackSize;
|
||||||
|
@ -397,11 +400,11 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
|
|
||||||
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
||||||
if(SPStackSize) {
|
if(SPStackSize) {
|
||||||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet, CurrentSubProgramInfo,
|
RI->insertRegToSet(MRI, CurrentRegisterAddedSet, CurrentSubProgramInfo,
|
||||||
SPReg);
|
SPReg);
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
||||||
StackOffset::getFixed(SPStackSize),
|
StackOffset::getFixed(SPStackSize), MachineInstr::FrameSetup,
|
||||||
MachineInstr::FrameSetup, getStackAlign());
|
getStackAlign());
|
||||||
|
|
||||||
// Emit ".cfi_def_cfa_offset SPStackSize"
|
// Emit ".cfi_def_cfa_offset SPStackSize"
|
||||||
unsigned CFIIndex = MF.addFrameInst(
|
unsigned CFIIndex = MF.addFrameInst(
|
||||||
|
@ -411,14 +414,21 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(TPStackSize) {
|
if (LocalStackSize) {
|
||||||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet, CurrentSubProgramInfo,
|
RI->adjustReg(MBB, MBBI, DL, RISCV::X8, RISCV::X8,
|
||||||
TPReg);
|
StackOffset::getFixed(LocalStackSize),
|
||||||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet, CurrentSubProgramInfo,
|
|
||||||
RI->getPrivateMemoryBaseRegister(MF));
|
|
||||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
|
||||||
StackOffset::getFixed(TPStackSize),
|
|
||||||
MachineInstr::FrameSetup, getStackAlign());
|
MachineInstr::FrameSetup, getStackAlign());
|
||||||
|
// Emit ".cfi_def_cfa_offset Local memory StackSize"
|
||||||
|
unsigned CFIIndex = MF.addFrameInst(
|
||||||
|
MCCFIInstruction::cfiDefCfaOffset(nullptr, SPStackSize));
|
||||||
|
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||||
|
.addCFIIndex(CFIIndex)
|
||||||
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
|
}
|
||||||
|
if (TPStackSize) {
|
||||||
|
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||||
|
StackOffset::getFixed(TPStackSize), MachineInstr::FrameSetup,
|
||||||
|
getStackAlign());
|
||||||
|
|
||||||
// Emit ".cfi_def_cfa_offset TPStackSize"
|
// Emit ".cfi_def_cfa_offset TPStackSize"
|
||||||
unsigned CFIIndex = MF.addFrameInst(
|
unsigned CFIIndex = MF.addFrameInst(
|
||||||
|
@ -500,23 +510,27 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
// Get 2 stack size for TP and SP
|
// Get 2 stack size for TP and SP
|
||||||
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
||||||
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||||
|
uint64_t LocalStackSize = getStackSize(MF, RISCVStackID::LocalMemSpill);
|
||||||
// Deallocate stack
|
// Deallocate stack
|
||||||
if(SPStackSize)
|
if(SPStackSize)
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
||||||
StackOffset::getFixed(-SPStackSize),
|
StackOffset::getFixed(-SPStackSize),
|
||||||
MachineInstr::FrameDestroy, getStackAlign());
|
MachineInstr::FrameDestroy, getStackAlign());
|
||||||
|
if(LocalStackSize)
|
||||||
|
RI->adjustReg(MBB, MBBI, DL, RISCV::X8, RISCV::X8,
|
||||||
|
StackOffset::getFixed(-LocalStackSize),
|
||||||
|
MachineInstr::FrameDestroy, getStackAlign());
|
||||||
if(TPStackSize) {
|
if(TPStackSize) {
|
||||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||||
StackOffset::getFixed(-TPStackSize),
|
StackOffset::getFixed(-TPStackSize),
|
||||||
MachineInstr::FrameDestroy, getStackAlign());
|
MachineInstr::FrameDestroy, getStackAlign());
|
||||||
|
|
||||||
// Restore V32
|
// Restore V32
|
||||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::VMV_V_X),
|
BuildMI(MBB, MBBI, DL, TII->get(RISCV::VMV_V_X),
|
||||||
RI->getPrivateMemoryBaseRegister(MF))
|
RI->getPrivateMemoryBaseRegister(MF))
|
||||||
.addReg(TPReg);
|
.addReg(TPReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit epilogue for shadow call stack.
|
// Emit epilogue for shadow call stack.
|
||||||
emitSCSEpilogue(MF, MBB, MBBI, DL);
|
emitSCSEpilogue(MF, MBB, MBBI, DL);
|
||||||
}
|
}
|
||||||
|
@ -527,8 +541,8 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
|
||||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
uint64_t StackSize = 0;
|
uint64_t StackSize = 0;
|
||||||
|
|
||||||
// because the parameters spilling to the stack are not in the current TP
|
// because the parameters spilling to the stack are not in the current TP
|
||||||
// stack, the offset in the current stack should not be calculated from a
|
// stack, the offset in the current stack should not be calculated from a
|
||||||
// negative FI.
|
// negative FI.
|
||||||
for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI + 1; I++) {
|
for (int I = FI < 0 ? MFI.getObjectIndexBegin() : 0; I != FI + 1; I++) {
|
||||||
if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
|
if (static_cast<unsigned>(MFI.getStackID(I)) == Stack) {
|
||||||
|
@ -545,7 +559,7 @@ uint64_t RISCVFrameLowering::getStackOffset(const MachineFunction &MF,
|
||||||
// instead of current stack.
|
// instead of current stack.
|
||||||
if (FI < 0 && !MF.getFunction().isVarArg())
|
if (FI < 0 && !MF.getFunction().isVarArg())
|
||||||
StackSize += getStackSize(MF, RISCVStackID::VGPRSpill);
|
StackSize += getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||||
|
|
||||||
return StackSize;
|
return StackSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -564,7 +578,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||||
|
|
||||||
assert((StackID == RISCVStackID::Default ||
|
assert((StackID == RISCVStackID::Default ||
|
||||||
StackID == RISCVStackID::SGPRSpill ||
|
StackID == RISCVStackID::SGPRSpill ||
|
||||||
StackID == RISCVStackID::VGPRSpill) &&
|
StackID == RISCVStackID::VGPRSpill ||
|
||||||
|
StackID == RISCVStackID::LocalMemSpill) &&
|
||||||
"Unexpected stack ID for the frame object.");
|
"Unexpected stack ID for the frame object.");
|
||||||
|
|
||||||
// Different stacks for sALU and vALU threads.
|
// Different stacks for sALU and vALU threads.
|
||||||
|
@ -586,7 +601,7 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
auto *CurrentProgramInfo = const_cast<VentusProgramInfo*>(
|
auto *CurrentProgramInfo = const_cast<VentusProgramInfo*>(
|
||||||
MF.getSubtarget<RISCVSubtarget>().getVentusProgramInfo());
|
MF.getSubtarget<RISCVSubtarget>().getVentusProgramInfo());
|
||||||
|
|
||||||
// When accessing a new function, we need to add a new container to calculate
|
// When accessing a new function, we need to add a new container to calculate
|
||||||
// its resource usage.
|
// its resource usage.
|
||||||
CurrentProgramInfo->RegisterAddedSetVec.push_back(DenseSet<unsigned>());
|
CurrentProgramInfo->RegisterAddedSetVec.push_back(DenseSet<unsigned>());
|
||||||
CurrentProgramInfo->SubProgramInfoVec.push_back(SubVentusProgramInfo());
|
CurrentProgramInfo->SubProgramInfoVec.push_back(SubVentusProgramInfo());
|
||||||
|
@ -604,14 +619,14 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
|
||||||
if (!Op.isReg())
|
if (!Op.isReg())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet,
|
RI->insertRegToSet(MRI, CurrentRegisterAddedSet,
|
||||||
CurrentSubProgramInfo, Op.getReg());
|
CurrentSubProgramInfo, Op.getReg());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ra register is a special register.
|
// ra register is a special register.
|
||||||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet,
|
RI->insertRegToSet(MRI, CurrentRegisterAddedSet,
|
||||||
CurrentSubProgramInfo, RISCV::X1);
|
CurrentSubProgramInfo, RISCV::X1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -706,14 +721,14 @@ MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
|
||||||
|
|
||||||
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
|
RI.adjustReg(MBB, MI, DL, SPReg, SPReg, StackOffset::getFixed(Amount),
|
||||||
MachineInstr::NoFlags, getStackAlign());
|
MachineInstr::NoFlags, getStackAlign());
|
||||||
|
|
||||||
// The value of TP will be re-assigned to V32 at the end of the callee
|
// The value of TP will be re-assigned to V32 at the end of the callee
|
||||||
// function, which is actually the TP value after ADJCALLSTACKUP, so the
|
// function, which is actually the TP value after ADJCALLSTACKUP, so the
|
||||||
// tp value after ADJCALLSTACKDOWN should be reassigned to V32 to ensure
|
// tp value after ADJCALLSTACKDOWN should be reassigned to V32 to ensure
|
||||||
// that it is consistent with the TP value that has not been internally
|
// that it is consistent with the TP value that has not been internally
|
||||||
// adjusted (that is, excluding the initial TP adjustment) within the
|
// adjusted (that is, excluding the initial TP adjustment) within the
|
||||||
// current function.
|
// current function.
|
||||||
if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
|
if (MI->getOpcode() == RISCV::ADJCALLSTACKDOWN)
|
||||||
BuildMI(MBB, MI, DL, TII->get(RISCV::VMV_V_X),
|
BuildMI(MBB, MI, DL, TII->get(RISCV::VMV_V_X),
|
||||||
RI.getPrivateMemoryBaseRegister(MF))
|
RI.getPrivateMemoryBaseRegister(MF))
|
||||||
.addReg(TPReg);
|
.addReg(TPReg);
|
||||||
|
@ -765,10 +780,10 @@ uint64_t RISCVFrameLowering::getStackSize(const MachineFunction &MF,
|
||||||
RISCVStackID::Value ID) const {
|
RISCVStackID::Value ID) const {
|
||||||
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
uint64_t StackSize = 0;
|
uint64_t StackSize = 0;
|
||||||
|
|
||||||
for(int I = 0; I != MFI.getObjectIndexEnd(); I++) {
|
for(int I = 0; I != MFI.getObjectIndexEnd(); I++) {
|
||||||
if(static_cast<unsigned>(MFI.getStackID(I)) == ID) {
|
if(static_cast<unsigned>(MFI.getStackID(I)) == ID) {
|
||||||
Align Alignment = MFI.getObjectAlign(I).value() <= 4 ?
|
Align Alignment = MFI.getObjectAlign(I).value() <= 4 ?
|
||||||
Align(4) : MFI.getObjectAlign(I);
|
Align(4) : MFI.getObjectAlign(I);
|
||||||
StackSize += MFI.getObjectSize(I);
|
StackSize += MFI.getObjectSize(I);
|
||||||
StackSize = alignTo(StackSize, Alignment);
|
StackSize = alignTo(StackSize, Alignment);
|
||||||
|
@ -780,16 +795,19 @@ uint64_t RISCVFrameLowering::getStackSize(const MachineFunction &MF,
|
||||||
|
|
||||||
void RISCVFrameLowering::determineStackID(MachineFunction &MF) const {
|
void RISCVFrameLowering::determineStackID(MachineFunction &MF) const {
|
||||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
|
for (int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
|
||||||
// FIXME: There is no sGPR spill stack!
|
// FIXME: There is no sGPR spill stack!
|
||||||
// MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
// MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
||||||
|
|
||||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I);
|
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, I);
|
||||||
if(MFI.getStackID(I) != RISCVStackID::SGPRSpill &&
|
if (MFI.getStackID(I) != RISCVStackID::Default)
|
||||||
PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
|
continue;
|
||||||
|
if (PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
|
||||||
MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
||||||
|
else if (PtrInfo.getAddrSpace() == RISCVAS::LOCAL_ADDRESS)
|
||||||
|
MFI.setStackID(I, RISCVStackID::LocalMemSpill);
|
||||||
else
|
else
|
||||||
MFI.setStackID(I, RISCVStackID::SGPRSpill);
|
MFI.setStackID(I, RISCVStackID::SGPRSpill);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -824,17 +842,17 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
|
||||||
Register Reg = CS.getReg();
|
Register Reg = CS.getReg();
|
||||||
|
|
||||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
||||||
// TODO: Have we allocated stack for vGPR spilling?
|
|
||||||
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255) {
|
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255) {
|
||||||
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
|
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
|
||||||
// FIXME: Right now, no vgpr callee saved register, maybe later needed
|
// FIXME: Right now, no vgpr callee saved register, maybe later needed
|
||||||
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
||||||
RC, TRI);
|
RC, TRI);
|
||||||
|
} else {
|
||||||
|
assert(Reg.id() >= RISCV::V32 && Reg.id() <= RISCV::V255 && "TODO");
|
||||||
|
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::VGPRSpill);
|
||||||
|
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
||||||
|
RC, TRI);
|
||||||
}
|
}
|
||||||
// else {
|
|
||||||
// FIXME: Right now, no callee saved register for VGPR
|
|
||||||
// MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::VGPRSpill);
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -862,8 +880,7 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
|
||||||
for (auto &CS : NonLibcallCSI) {
|
for (auto &CS : NonLibcallCSI) {
|
||||||
Register Reg = CS.getReg();
|
Register Reg = CS.getReg();
|
||||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
||||||
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255 )
|
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
|
||||||
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
|
|
||||||
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
|
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -946,6 +963,7 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
|
||||||
case RISCVStackID::Default:
|
case RISCVStackID::Default:
|
||||||
case RISCVStackID::SGPRSpill:
|
case RISCVStackID::SGPRSpill:
|
||||||
case RISCVStackID::VGPRSpill:
|
case RISCVStackID::VGPRSpill:
|
||||||
|
case RISCVStackID::LocalMemSpill:
|
||||||
return true;
|
return true;
|
||||||
case RISCVStackID::ScalableVector:
|
case RISCVStackID::ScalableVector:
|
||||||
case RISCVStackID::NoAlloc:
|
case RISCVStackID::NoAlloc:
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#include "RISCVSubtarget.h"
|
#include "RISCVSubtarget.h"
|
||||||
#include "RISCVTargetMachine.h"
|
#include "RISCVTargetMachine.h"
|
||||||
#include "llvm/ADT/SmallSet.h"
|
#include "llvm/ADT/SmallSet.h"
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||||
#include "llvm/Analysis/MemoryLocation.h"
|
#include "llvm/Analysis/MemoryLocation.h"
|
||||||
|
@ -35,6 +36,7 @@
|
||||||
#include "llvm/CodeGen/ValueTypes.h"
|
#include "llvm/CodeGen/ValueTypes.h"
|
||||||
#include "llvm/IR/DiagnosticInfo.h"
|
#include "llvm/IR/DiagnosticInfo.h"
|
||||||
#include "llvm/IR/DiagnosticPrinter.h"
|
#include "llvm/IR/DiagnosticPrinter.h"
|
||||||
|
#include "llvm/IR/GlobalVariable.h"
|
||||||
#include "llvm/IR/IRBuilder.h"
|
#include "llvm/IR/IRBuilder.h"
|
||||||
#include "llvm/IR/IntrinsicsRISCV.h"
|
#include "llvm/IR/IntrinsicsRISCV.h"
|
||||||
#include "llvm/IR/PatternMatch.h"
|
#include "llvm/IR/PatternMatch.h"
|
||||||
|
@ -4296,9 +4298,35 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
|
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
|
||||||
assert(N->getOffset() == 0 && "unexpected offset in global node");
|
assert(N->getOffset() == 0 && "unexpected offset in global node");
|
||||||
|
// FIXME: Only support local address?
|
||||||
|
if (N->getAddressSpace() == RISCVAS::LOCAL_ADDRESS)
|
||||||
|
return lowerGlobalLocalAddress(N, DAG);
|
||||||
return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
|
return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// For local variables, we need to store variables into local memory,
|
||||||
|
/// rather than put it into '.sbss' section
|
||||||
|
/// TODO: Remove the address allocating in '.sbss' section
|
||||||
|
SDValue RISCVTargetLowering::lowerGlobalLocalAddress(GlobalAddressSDNode *Op,
|
||||||
|
SelectionDAG &DAG) const {
|
||||||
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
|
static SmallVector<std::pair<const GlobalVariable *, int>> LoweredVariables;
|
||||||
|
|
||||||
|
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||||
|
const DataLayout &DL = DAG.getDataLayout();
|
||||||
|
auto *GV = cast<GlobalVariable>(Op->getGlobal());
|
||||||
|
for(auto &VA : LoweredVariables) {
|
||||||
|
if(VA.first == GV)
|
||||||
|
return DAG.getFrameIndex(VA.second, MVT::i32);
|
||||||
|
}
|
||||||
|
unsigned AlignValue = DL.getABITypeAlignment(GV->getValueType());
|
||||||
|
int FI = MFI.CreateStackObject(DL.getTypeAllocSize(GV->getValueType())
|
||||||
|
/*Offset need to be modified too*/,
|
||||||
|
Align(AlignValue), false, nullptr, RISCVStackID::LocalMemSpill);
|
||||||
|
LoweredVariables.push_back(std::make_pair(GV, FI));
|
||||||
|
return DAG.getFrameIndex(FI, MVT::i32);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
|
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
|
||||||
SelectionDAG &DAG) const {
|
SelectionDAG &DAG) const {
|
||||||
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
|
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
|
||||||
|
@ -7458,7 +7486,6 @@ SDValue RISCVTargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
|
||||||
|
|
||||||
return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
|
return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue RISCVTargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
|
SDValue RISCVTargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
|
||||||
SDValue Op,
|
SDValue Op,
|
||||||
const SDLoc &DL,
|
const SDLoc &DL,
|
||||||
|
@ -11480,7 +11507,7 @@ static bool CC_Ventus(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
|
||||||
|
|
||||||
// Allocate stack for arguments which can not use register
|
// Allocate stack for arguments which can not use register
|
||||||
unsigned StackOffset =
|
unsigned StackOffset =
|
||||||
Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
|
Reg ? 0 : -State.AllocateStack(StoreSizeBytes, StackAlign);
|
||||||
|
|
||||||
// If we reach this point and PendingLocs is non-empty, we must be at the
|
// If we reach this point and PendingLocs is non-empty, we must be at the
|
||||||
// end of a split argument that must be passed indirectly.
|
// end of a split argument that must be passed indirectly.
|
||||||
|
@ -11788,7 +11815,7 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
|
||||||
ValVT = LocVT;
|
ValVT = LocVT;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Just align to 4 bytes, because parameters more than 4 bytes will be split
|
// Just align to 4 bytes, because parameters more than 4 bytes will be split
|
||||||
// into 4-byte parameters
|
// into 4-byte parameters
|
||||||
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), 0,
|
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), 0,
|
||||||
/*IsImmutable=*/true);
|
/*IsImmutable=*/true);
|
||||||
|
@ -11904,7 +11931,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||||
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
|
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
|
||||||
else if (VA.isRegLoc())
|
else if (VA.isRegLoc())
|
||||||
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this, Ins[i]);
|
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this, Ins[i]);
|
||||||
else
|
else
|
||||||
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
|
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
|
||||||
|
|
||||||
if (VA.getLocInfo() == CCValAssign::Indirect) {
|
if (VA.getLocInfo() == CCValAssign::Indirect) {
|
||||||
|
@ -12264,12 +12291,12 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||||
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
|
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X4, PtrVT);
|
||||||
SDValue Address =
|
SDValue Address =
|
||||||
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
|
||||||
DAG.getIntPtrConstant(-((int)VA.getLocMemOffset()
|
DAG.getIntPtrConstant(-((int)VA.getLocMemOffset()
|
||||||
+ CurrentFrameSize), DL));
|
+ CurrentFrameSize), DL));
|
||||||
|
|
||||||
// Emit the store.
|
// Emit the store.
|
||||||
MemOpChains.push_back(
|
MemOpChains.push_back(
|
||||||
DAG.getStore(Chain, DL, ArgValue, Address,
|
DAG.getStore(Chain, DL, ArgValue, Address,
|
||||||
MachinePointerInfo(RISCVAS::PRIVATE_ADDRESS)));
|
MachinePointerInfo(RISCVAS::PRIVATE_ADDRESS)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13483,6 +13510,10 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
||||||
}
|
}
|
||||||
case ISD::STORE: {
|
case ISD::STORE: {
|
||||||
const StoreSDNode *Store= cast<StoreSDNode>(N);
|
const StoreSDNode *Store= cast<StoreSDNode>(N);
|
||||||
|
auto &MFI = FLI->MF->getFrameInfo();
|
||||||
|
if(auto *BaseBase = dyn_cast<FrameIndexSDNode>(Store->getOperand(1)))
|
||||||
|
if(MFI.getStackID(BaseBase->getIndex()) == RISCVStackID::SGPRSpill)
|
||||||
|
return false;
|
||||||
return Store->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS ||
|
return Store->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS ||
|
||||||
Store->getPointerInfo().StackID == RISCVStackID::VGPRSpill;
|
Store->getPointerInfo().StackID == RISCVStackID::VGPRSpill;
|
||||||
}
|
}
|
||||||
|
@ -13494,6 +13525,8 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
||||||
case ISD::INTRINSIC_W_CHAIN:
|
case ISD::INTRINSIC_W_CHAIN:
|
||||||
return RISCVII::isIntrinsicSourceOfDivergence(
|
return RISCVII::isIntrinsicSourceOfDivergence(
|
||||||
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
|
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
|
||||||
|
case Intrinsic::vastart:
|
||||||
|
return true;
|
||||||
/*
|
/*
|
||||||
case AMDGPUISD::ATOMIC_CMP_SWAP:
|
case AMDGPUISD::ATOMIC_CMP_SWAP:
|
||||||
case AMDGPUISD::ATOMIC_INC:
|
case AMDGPUISD::ATOMIC_INC:
|
||||||
|
|
|
@ -646,6 +646,9 @@ private:
|
||||||
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
|
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue lowerGlobalLocalAddress(GlobalAddressSDNode *Op,
|
||||||
|
SelectionDAG &DAG) const;
|
||||||
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||||
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
|
@ -95,7 +95,7 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RISCVInstrInfo::isVGPRMemoryAccess(const MachineInstr &MI) const {
|
bool RISCVInstrInfo::isPrivateMemoryAccess(const MachineInstr &MI) const {
|
||||||
switch (MI.getOpcode()) {
|
switch (MI.getOpcode()) {
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
|
@ -107,11 +107,102 @@ bool RISCVInstrInfo::isVGPRMemoryAccess(const MachineInstr &MI) const {
|
||||||
case RISCV::VSW:
|
case RISCV::VSW:
|
||||||
case RISCV::VSH:
|
case RISCV::VSH:
|
||||||
case RISCV::VSB:
|
case RISCV::VSB:
|
||||||
case RISCV::VSWI12:
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RISCVInstrInfo::isUniformMemoryAccess(const MachineInstr &MI) const {
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case RISCV::LW:
|
||||||
|
case RISCV::LB:
|
||||||
|
case RISCV::LBU:
|
||||||
|
case RISCV::LH:
|
||||||
|
case RISCV::LHU:
|
||||||
|
case RISCV::SW:
|
||||||
|
case RISCV::SH:
|
||||||
|
case RISCV::SB:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RISCVInstrInfo::isLocalMemoryAccess(const MachineInstr &MI) const {
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case RISCV::VLWI12:
|
||||||
|
case RISCV::VLBI12:
|
||||||
|
case RISCV::VLBUI12:
|
||||||
|
case RISCV::VLHI12:
|
||||||
|
case RISCV::VLHUI12:
|
||||||
|
case RISCV::VSWI12:
|
||||||
|
case RISCV::VSHI12:
|
||||||
|
case RISCV::VSBI12:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
unsigned RISCVInstrInfo::getPrivateMemoryOpcode(MachineInstr &MI) const {
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
case RISCV::LW:
|
||||||
|
case RISCV::VLWI12:
|
||||||
|
return RISCV::VLW;
|
||||||
|
case RISCV::LB:
|
||||||
|
case RISCV::VLBI12:
|
||||||
|
return RISCV::VLB;
|
||||||
|
case RISCV::LBU:
|
||||||
|
case RISCV::VLBUI12:
|
||||||
|
return RISCV::VLBU;
|
||||||
|
case RISCV::LH:
|
||||||
|
case RISCV::VLHI12:
|
||||||
|
return RISCV::VLH;
|
||||||
|
case RISCV::LHU:
|
||||||
|
case RISCV::VLHUI12:
|
||||||
|
return RISCV::VLHU;
|
||||||
|
case RISCV::SW:
|
||||||
|
case RISCV::VSWI12:
|
||||||
|
return RISCV::VSW;
|
||||||
|
case RISCV::SH:
|
||||||
|
case RISCV::VSHI12:
|
||||||
|
return RISCV::VSH;
|
||||||
|
case RISCV::SB:
|
||||||
|
case RISCV::VSBI12:
|
||||||
|
return RISCV::VSB;
|
||||||
|
default:
|
||||||
|
// MI.dump();
|
||||||
|
assert(0 && "TODO");
|
||||||
|
return RISCV::VLW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned RISCVInstrInfo::getUniformMemoryOpcode(MachineInstr &MI) const {
|
||||||
|
switch (MI.getOpcode()) {
|
||||||
|
case RISCV::VLW:
|
||||||
|
return RISCV::VLWI12;
|
||||||
|
case RISCV::VLB:
|
||||||
|
return RISCV::VLBI12;
|
||||||
|
case RISCV::VLBU:
|
||||||
|
return RISCV::VLBUI12;
|
||||||
|
case RISCV::VLH:
|
||||||
|
return RISCV::VLHI12;
|
||||||
|
case RISCV::VLHU:
|
||||||
|
return RISCV::VLHUI12;
|
||||||
|
case RISCV::VSW:
|
||||||
|
return RISCV::VSWI12;
|
||||||
|
case RISCV::VSH:
|
||||||
|
return RISCV::VSHI12;
|
||||||
|
case RISCV::VSB:
|
||||||
|
return RISCV::VSBI12;
|
||||||
|
default:
|
||||||
|
// MI.dump();
|
||||||
|
assert(0 && "TODO");
|
||||||
|
return RISCV::VLW;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
||||||
int &FrameIndex) const {
|
int &FrameIndex) const {
|
||||||
switch (MI.getOpcode()) {
|
switch (MI.getOpcode()) {
|
||||||
|
|
|
@ -55,7 +55,18 @@ public:
|
||||||
MCInst getNop() const override;
|
MCInst getNop() const override;
|
||||||
const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const;
|
const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const;
|
||||||
|
|
||||||
bool isVGPRMemoryAccess(const MachineInstr &MI) const;
|
/// Check the memory access instruction is private memory access
|
||||||
|
bool isPrivateMemoryAccess(const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
/// Check the memory access instruction is uniform memory access
|
||||||
|
bool isUniformMemoryAccess(const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
/// Check the memory access instruction is uniform memory access
|
||||||
|
bool isLocalMemoryAccess(const MachineInstr &MI) const;
|
||||||
|
|
||||||
|
unsigned getPrivateMemoryOpcode(MachineInstr &MI) const;
|
||||||
|
|
||||||
|
unsigned getUniformMemoryOpcode(MachineInstr &MI) const;
|
||||||
|
|
||||||
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
||||||
int &FrameIndex) const override;
|
int &FrameIndex) const override;
|
||||||
|
|
|
@ -87,10 +87,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||||
// Use markSuperRegs to ensure any register aliases are also reserved
|
// Use markSuperRegs to ensure any register aliases are also reserved
|
||||||
markSuperRegs(Reserved, RISCV::X0); // zero
|
markSuperRegs(Reserved, RISCV::X0); // zero
|
||||||
markSuperRegs(Reserved, RISCV::X2); // sp
|
markSuperRegs(Reserved, RISCV::X2); // sp
|
||||||
|
markSuperRegs(Reserved, RISCV::X8); // s0
|
||||||
markSuperRegs(Reserved, RISCV::X3); // gp
|
markSuperRegs(Reserved, RISCV::X3); // gp
|
||||||
markSuperRegs(Reserved, RISCV::X4); // tp
|
markSuperRegs(Reserved, RISCV::X4); // tp
|
||||||
if (TFI->hasFP(MF))
|
|
||||||
markSuperRegs(Reserved, RISCV::X8); // fp
|
|
||||||
// Reserve the base register if we need to realign the stack and allocate
|
// Reserve the base register if we need to realign the stack and allocate
|
||||||
// variable-sized objects at runtime.
|
// variable-sized objects at runtime.
|
||||||
if (TFI->hasBP(MF))
|
if (TFI->hasBP(MF))
|
||||||
|
@ -333,9 +332,11 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
|
assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
|
||||||
|
|
||||||
MachineInstr &MI = *II;
|
MachineInstr &MI = *II;
|
||||||
|
MachineBasicBlock *MBB = MI.getParent();
|
||||||
MachineFunction &MF = *MI.getParent()->getParent();
|
MachineFunction &MF = *MI.getParent()->getParent();
|
||||||
|
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||||
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
|
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
|
||||||
const RISCVRegisterInfo * RI = ST.getRegisterInfo();
|
const RISCVRegisterInfo *RI = ST.getRegisterInfo();
|
||||||
const RISCVInstrInfo *RII = ST.getInstrInfo();
|
const RISCVInstrInfo *RII = ST.getInstrInfo();
|
||||||
DebugLoc DL = MI.getDebugLoc();
|
DebugLoc DL = MI.getDebugLoc();
|
||||||
|
|
||||||
|
@ -343,12 +344,14 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
auto FrameIndexID = MF.getFrameInfo().getStackID(FrameIndex);
|
auto FrameIndexID = MF.getFrameInfo().getStackID(FrameIndex);
|
||||||
|
|
||||||
Register FrameReg;
|
Register FrameReg;
|
||||||
StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on divergency route.
|
StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on
|
||||||
|
// divergency route.
|
||||||
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
|
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
|
||||||
// TODO: finish
|
// TODO: finish
|
||||||
// if(!RII->isVGPRMemoryAccess(MI))
|
// if(!RII->isVGPRMemoryAccess(MI))
|
||||||
// Offset -= StackOffset::getFixed(
|
// Offset -= StackOffset::getFixed(
|
||||||
// MF.getInfo<RISCVMachineFunctionInfo>()->getVarArgsSaveSize() - 4);
|
// MF.getInfo<RISCVMachineFunctionInfo>()->getVarArgsSaveSize() -
|
||||||
|
// 4);
|
||||||
int64_t Lo11 = Offset.getFixed();
|
int64_t Lo11 = Offset.getFixed();
|
||||||
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
|
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
|
||||||
|
|
||||||
|
@ -356,7 +359,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
report_fatal_error(
|
report_fatal_error(
|
||||||
"Frame offsets outside of the signed 32-bit range not supported");
|
"Frame offsets outside of the signed 32-bit range not supported");
|
||||||
}
|
}
|
||||||
|
// FIXME: vsw/vlw has 11 bits immediates
|
||||||
if (MI.getOpcode() == RISCV::ADDI && !isInt<11>(Offset.getFixed())) {
|
if (MI.getOpcode() == RISCV::ADDI && !isInt<11>(Offset.getFixed())) {
|
||||||
// We chose to emit the canonical immediate sequence rather than folding
|
// We chose to emit the canonical immediate sequence rather than folding
|
||||||
// the offset into the using add under the theory that doing so doesn't
|
// the offset into the using add under the theory that doing so doesn't
|
||||||
|
@ -369,38 +372,119 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
// operand of our user instruction. As a result, the remaining
|
// operand of our user instruction. As a result, the remaining
|
||||||
// offset can by construction, at worst, a LUI and a ADD.
|
// offset can by construction, at worst, a LUI and a ADD.
|
||||||
int64_t Val = Offset.getFixed();
|
int64_t Val = Offset.getFixed();
|
||||||
Lo11 = SignExtend64<11>(Val);
|
Lo11 = SignExtend64<12>(Val);
|
||||||
|
|
||||||
|
|
||||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo11);
|
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo11);
|
||||||
Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo11,
|
Offset =
|
||||||
Offset.getScalable());
|
StackOffset::get((uint64_t)Val - (uint64_t)Lo11, Offset.getScalable());
|
||||||
|
// adjustReg(*II->getParent(), II, DL, DestReg, FrameReg, Offset,
|
||||||
|
// MachineInstr::NoFlags, std::nullopt);
|
||||||
}
|
}
|
||||||
if(MI.getOpcode() == RISCV::ADDI &&
|
Register DestReg = MI.getOperand(0).getReg();
|
||||||
static_cast<unsigned>(FrameIndexID) == RISCVStackID::VGPRSpill) {
|
if (Offset.getScalable() || Offset.getFixed()) {
|
||||||
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg,
|
|
||||||
/*IsDef*/false,
|
|
||||||
/*IsImp*/false,
|
|
||||||
/*IsKill*/false);
|
|
||||||
|
|
||||||
|
if (MI.getOpcode() == RISCV::ADDI)
|
||||||
|
DestReg = MI.getOperand(0).getReg();
|
||||||
|
else
|
||||||
|
DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||||
|
// !!!Very importtant for adjust
|
||||||
|
adjustReg(*II->getParent(), II, DL, DestReg, FrameReg, Offset,
|
||||||
|
MachineInstr::NoFlags, std::nullopt);
|
||||||
|
}
|
||||||
|
if (MI.getOpcode() == RISCV::ADDI &&
|
||||||
|
static_cast<unsigned>(FrameIndexID) == RISCVStackID::VGPRSpill) {
|
||||||
|
MI.getOperand(FIOperandNum)
|
||||||
|
.ChangeToRegister(FrameReg,
|
||||||
|
/*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(RII->isVGPRMemoryAccess(MI)) {
|
if (RII->isPrivateMemoryAccess(MI) && FrameReg == RISCV::X4) {
|
||||||
MI.getOperand(FIOperandNum).ChangeToRegister(getPrivateMemoryBaseRegister(MF),
|
MI.getOperand(FIOperandNum)
|
||||||
/*IsDef*/false,
|
.ChangeToRegister(getPrivateMemoryBaseRegister(MF),
|
||||||
/*IsImp*/false,
|
/*IsDef*/ false,
|
||||||
/*IsKill*/false);
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
// simm11 locates in range [-1024, 1023], if offset not in this range, then
|
// simm11 locates in range [-1024, 1023], if offset not in this range, then
|
||||||
// we legalize the offset
|
// we legalize the offset
|
||||||
if(!isInt<11>(Lo11))
|
if (!isInt<12>(Lo11))
|
||||||
adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11,
|
adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11,
|
||||||
getPrivateMemoryBaseRegister(MF), FIOperandNum);
|
getPrivateMemoryBaseRegister(MF), FIOperandNum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (RII->isPrivateMemoryAccess(MI) && FrameReg == RISCV::X2) {
|
||||||
|
MI.getOperand(FIOperandNum)
|
||||||
|
.ChangeToRegister(getPrivateMemoryBaseRegister(MF),
|
||||||
|
/*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
|
// simm11 locates in range [-1024, 1023], if offset not in this range, then
|
||||||
|
// we legalize the offset
|
||||||
|
MI.setDesc(RII->get(RII->getUniformMemoryOpcode(MI)));
|
||||||
|
if (!isInt<12>(Lo11))
|
||||||
|
adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11,
|
||||||
|
getPrivateMemoryBaseRegister(MF), FIOperandNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
// else
|
||||||
|
// MI.getOperand(FIOperandNum)
|
||||||
|
// .ChangeToRegister(FrameReg, /*IsDef*/ false,
|
||||||
|
// /*IsImp*/ false,
|
||||||
|
// /*IsKill*/ false);
|
||||||
|
if (RII->isUniformMemoryAccess(MI) && FrameReg == RISCV::X4) {
|
||||||
|
Register DestReg =
|
||||||
|
MF.getRegInfo().createVirtualRegister(&RISCV::VGPRRegClass);
|
||||||
|
MI.setDesc(RII->get(RII->getPrivateMemoryOpcode(MI)));
|
||||||
|
BuildMI(*MBB, II, DL, RII->get(RISCV::VMV_V_X), DestReg)
|
||||||
|
.addReg(MI.getOperand(FIOperandNum - 1).getReg());
|
||||||
|
MI.getOperand(FIOperandNum)
|
||||||
|
.ChangeToRegister(getPrivateMemoryBaseRegister(MF), /*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
|
MI.getOperand(FIOperandNum - 1)
|
||||||
|
.ChangeToRegister(DestReg, /*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (RII->isLocalMemoryAccess(MI) && FrameReg == RISCV::X4) {
|
||||||
|
Register DestReg =
|
||||||
|
MF.getRegInfo().createVirtualRegister(&RISCV::VGPRRegClass);
|
||||||
|
BuildMI(*MBB, II, DL, RII->get(RISCV::VMV_V_X), DestReg).addReg(FrameReg);
|
||||||
|
MI.getOperand(FIOperandNum)
|
||||||
|
.ChangeToRegister(getFrameRegister(MF), /*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
|
MI.setDesc(RII->get(RII->getPrivateMemoryOpcode(MI)));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (RII->isLocalMemoryAccess(MI) && FrameReg == RISCV::X2) {
|
||||||
|
Register DestReg =
|
||||||
|
MF.getRegInfo().createVirtualRegister(&RISCV::VGPRRegClass);
|
||||||
|
BuildMI(*MBB, II, DL, RII->get(RISCV::VMV_V_X), DestReg).addReg(FrameReg);
|
||||||
|
MI.getOperand(FIOperandNum)
|
||||||
|
.ChangeToRegister(DestReg, /*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (RII->isPrivateMemoryAccess(MI))
|
||||||
|
MI.getOperand(FIOperandNum)
|
||||||
|
.ChangeToRegister(getPrivateMemoryBaseRegister(MF), /*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
else
|
else
|
||||||
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false,
|
MI.getOperand(FIOperandNum)
|
||||||
/*IsImp*/false,
|
.ChangeToRegister(DestReg == MI.getOperand(0).getReg() ? FrameReg
|
||||||
/*IsKill*/false);
|
: DestReg,
|
||||||
|
/*IsDef*/ false,
|
||||||
|
/*IsImp*/ false,
|
||||||
|
/*IsKill*/ false);
|
||||||
|
|
||||||
// If after materializing the adjustment, we have a pointless ADDI, remove it
|
// If after materializing the adjustment, we have a pointless ADDI, remove it
|
||||||
if (MI.getOpcode() == RISCV::ADDI &&
|
if (MI.getOpcode() == RISCV::ADDI &&
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||||
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||||
|
|
||||||
|
@compute_sum_with_localmem.tmp_sum = internal addrspace(3) global [10 x i32] undef, align 4
|
||||||
|
|
||||||
|
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
|
||||||
|
define dso_local ventus_kernel void @compute_sum_with_localmem(ptr addrspace(1) noundef align 4 %a, i32 noundef %n, ptr addrspace(1) noundef align 4 %sum) {
|
||||||
|
; VENTUS-LABEL: compute_sum_with_localmem:
|
||||||
|
; VENTUS: # %bb.0: # %entry
|
||||||
|
; VENTUS-NEXT: li t0, 12
|
||||||
|
; VENTUS-NEXT: sw t0, -16(s0)
|
||||||
|
; VENTUS-NEXT: sw t0, -40(s0)
|
||||||
|
; VENTUS-NEXT: sw t0, -4(s0)
|
||||||
|
; VENTUS-NEXT: addi s0, s0, -40
|
||||||
|
; VENTUS-NEXT: ret
|
||||||
|
entry:
|
||||||
|
%a.addr = alloca ptr addrspace(1), align 4, addrspace(5)
|
||||||
|
%n.addr = alloca i32, align 4, addrspace(5)
|
||||||
|
%sum.addr = alloca ptr addrspace(1), align 4, addrspace(5)
|
||||||
|
store ptr addrspace(1) %a, ptr addrspace(5) %a.addr, align 4
|
||||||
|
store i32 %n, ptr addrspace(5) %n.addr, align 4
|
||||||
|
store ptr addrspace(1) %sum, ptr addrspace(5) %sum.addr, align 4
|
||||||
|
store i32 12, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) @compute_sum_with_localmem.tmp_sum, i32 0, i32 6), align 4
|
||||||
|
store i32 12, ptr addrspace(3) @compute_sum_with_localmem.tmp_sum, align 4
|
||||||
|
store i32 12, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) @compute_sum_with_localmem.tmp_sum, i32 0, i32 9), align 4
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue