[VENTUS][fix] Put local variables declared in kernel function into shared memory
This commit is contained in:
parent
a48f51ab76
commit
87fe5f3ce8
|
@ -43,10 +43,14 @@ _start:
|
|||
li t4, 0
|
||||
csrr t1, CSR_WID
|
||||
csrr t2, CSR_LDS
|
||||
li t3, 1024 # 1k size for single warp
|
||||
mul t1, t1, t3 # sp = lds + wid * warp_size
|
||||
li t3, 1024 # 1M size for single warp
|
||||
mul t1, t1, t3 # sp wid * warp_size
|
||||
add sp, t1, t2 # sp points to baseaddr of local memory of each SM
|
||||
li tp, 0 # tp points to baseaddr for lower bound of private memory(1K) of each thread
|
||||
csrr t5, CSR_NUMW
|
||||
li t3, 1024
|
||||
mul t5, t5, t3
|
||||
add s0, t2, t5 # s0 points to local memory base addr in a workgroup
|
||||
|
||||
# clear BSS segment
|
||||
la a0, _edata
|
||||
|
|
|
@ -116,6 +116,7 @@ enum Value {
|
|||
ScalableVector = 2,
|
||||
WasmLocal = 3,
|
||||
VGPRSpill = 4,
|
||||
LocalMemSpill = 5,
|
||||
NoAlloc = 255
|
||||
};
|
||||
}
|
||||
|
|
|
@ -300,7 +300,8 @@ getNonLibcallCSI(const MachineFunction &MF,
|
|||
// TODO: For now, we don't define VGPR callee saved registers, when we later
|
||||
// add VGPR callee saved register, remember to modify here
|
||||
if (FI >= 0 && (MFI.getStackID(FI) == RISCVStackID::Default ||
|
||||
MFI.getStackID(FI) == RISCVStackID::SGPRSpill))
|
||||
MFI.getStackID(FI) == RISCVStackID::SGPRSpill ||
|
||||
MFI.getStackID(FI) == RISCVStackID::VGPRSpill))
|
||||
NonLibcallCSI.push_back(CS);
|
||||
}
|
||||
|
||||
|
@ -374,6 +375,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
|
||||
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
||||
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||
uint64_t LocalStackSize = getStackSize(MF, RISCVStackID::LocalMemSpill);
|
||||
|
||||
// FIXME: need to add local data declaration calculation
|
||||
CurrentSubProgramInfo->LDSMemory += SPStackSize;
|
||||
CurrentSubProgramInfo->PDSMemory += TPStackSize;
|
||||
|
@ -400,8 +403,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet, CurrentSubProgramInfo,
|
||||
SPReg);
|
||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
||||
StackOffset::getFixed(SPStackSize),
|
||||
MachineInstr::FrameSetup, getStackAlign());
|
||||
StackOffset::getFixed(SPStackSize), MachineInstr::FrameSetup,
|
||||
getStackAlign());
|
||||
|
||||
// Emit ".cfi_def_cfa_offset SPStackSize"
|
||||
unsigned CFIIndex = MF.addFrameInst(
|
||||
|
@ -411,14 +414,21 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
if(TPStackSize) {
|
||||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet, CurrentSubProgramInfo,
|
||||
TPReg);
|
||||
RI->insertRegToSet(MRI, CurrentRegisterAddedSet, CurrentSubProgramInfo,
|
||||
RI->getPrivateMemoryBaseRegister(MF));
|
||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||
StackOffset::getFixed(TPStackSize),
|
||||
if (LocalStackSize) {
|
||||
RI->adjustReg(MBB, MBBI, DL, RISCV::X8, RISCV::X8,
|
||||
StackOffset::getFixed(LocalStackSize),
|
||||
MachineInstr::FrameSetup, getStackAlign());
|
||||
// Emit ".cfi_def_cfa_offset Local memory StackSize"
|
||||
unsigned CFIIndex = MF.addFrameInst(
|
||||
MCCFIInstruction::cfiDefCfaOffset(nullptr, SPStackSize));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
if (TPStackSize) {
|
||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||
StackOffset::getFixed(TPStackSize), MachineInstr::FrameSetup,
|
||||
getStackAlign());
|
||||
|
||||
// Emit ".cfi_def_cfa_offset TPStackSize"
|
||||
unsigned CFIIndex = MF.addFrameInst(
|
||||
|
@ -500,12 +510,16 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
// Get 2 stack size for TP and SP
|
||||
uint64_t SPStackSize = getStackSize(MF, RISCVStackID::SGPRSpill);
|
||||
uint64_t TPStackSize = getStackSize(MF, RISCVStackID::VGPRSpill);
|
||||
|
||||
uint64_t LocalStackSize = getStackSize(MF, RISCVStackID::LocalMemSpill);
|
||||
// Deallocate stack
|
||||
if(SPStackSize)
|
||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
|
||||
StackOffset::getFixed(-SPStackSize),
|
||||
MachineInstr::FrameDestroy, getStackAlign());
|
||||
if(LocalStackSize)
|
||||
RI->adjustReg(MBB, MBBI, DL, RISCV::X8, RISCV::X8,
|
||||
StackOffset::getFixed(-LocalStackSize),
|
||||
MachineInstr::FrameDestroy, getStackAlign());
|
||||
if(TPStackSize) {
|
||||
RI->adjustReg(MBB, MBBI, DL, TPReg, TPReg,
|
||||
StackOffset::getFixed(-TPStackSize),
|
||||
|
@ -564,7 +578,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|||
|
||||
assert((StackID == RISCVStackID::Default ||
|
||||
StackID == RISCVStackID::SGPRSpill ||
|
||||
StackID == RISCVStackID::VGPRSpill) &&
|
||||
StackID == RISCVStackID::VGPRSpill ||
|
||||
StackID == RISCVStackID::LocalMemSpill) &&
|
||||
"Unexpected stack ID for the frame object.");
|
||||
|
||||
// Different stacks for sALU and vALU threads.
|
||||
|
@ -785,9 +800,12 @@ void RISCVFrameLowering::determineStackID(MachineFunction &MF) const {
|
|||
// MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
||||
|
||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, I);
|
||||
if(MFI.getStackID(I) != RISCVStackID::SGPRSpill &&
|
||||
PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
|
||||
if (MFI.getStackID(I) != RISCVStackID::Default)
|
||||
continue;
|
||||
if (PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
|
||||
MFI.setStackID(I, RISCVStackID::VGPRSpill);
|
||||
else if (PtrInfo.getAddrSpace() == RISCVAS::LOCAL_ADDRESS)
|
||||
MFI.setStackID(I, RISCVStackID::LocalMemSpill);
|
||||
else
|
||||
MFI.setStackID(I, RISCVStackID::SGPRSpill);
|
||||
}
|
||||
|
@ -824,17 +842,17 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
|
|||
Register Reg = CS.getReg();
|
||||
|
||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
||||
// TODO: Have we allocated stack for vGPR spilling?
|
||||
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255) {
|
||||
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::SGPRSpill);
|
||||
// FIXME: Right now, no vgpr callee saved register, maybe later needed
|
||||
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
||||
RC, TRI);
|
||||
} else {
|
||||
assert(Reg.id() >= RISCV::V32 && Reg.id() <= RISCV::V255 && "TODO");
|
||||
MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::VGPRSpill);
|
||||
TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
|
||||
RC, TRI);
|
||||
}
|
||||
// else {
|
||||
// FIXME: Right now, no callee saved register for VGPR
|
||||
// MF->getFrameInfo().setStackID(CS.getFrameIdx(), RISCVStackID::VGPRSpill);
|
||||
// }
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -862,7 +880,6 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
|
|||
for (auto &CS : NonLibcallCSI) {
|
||||
Register Reg = CS.getReg();
|
||||
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
|
||||
if(Reg.id() < RISCV::V0 || Reg.id() > RISCV::V255 )
|
||||
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
|
||||
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
|
||||
}
|
||||
|
@ -946,6 +963,7 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
|
|||
case RISCVStackID::Default:
|
||||
case RISCVStackID::SGPRSpill:
|
||||
case RISCVStackID::VGPRSpill:
|
||||
case RISCVStackID::LocalMemSpill:
|
||||
return true;
|
||||
case RISCVStackID::ScalableVector:
|
||||
case RISCVStackID::NoAlloc:
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "RISCVSubtarget.h"
|
||||
#include "RISCVTargetMachine.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
|
@ -35,6 +36,7 @@
|
|||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
#include "llvm/IR/DiagnosticInfo.h"
|
||||
#include "llvm/IR/DiagnosticPrinter.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/IntrinsicsRISCV.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
|
@ -4296,9 +4298,35 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
|
|||
SelectionDAG &DAG) const {
|
||||
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
|
||||
assert(N->getOffset() == 0 && "unexpected offset in global node");
|
||||
// FIXME: Only support local address?
|
||||
if (N->getAddressSpace() == RISCVAS::LOCAL_ADDRESS)
|
||||
return lowerGlobalLocalAddress(N, DAG);
|
||||
return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
|
||||
}
|
||||
|
||||
/// For local variables, we need to store variables into local memory,
|
||||
/// rather than put it into '.sbss' section
|
||||
/// TODO: Remove the address allocating in '.sbss' section
|
||||
SDValue RISCVTargetLowering::lowerGlobalLocalAddress(GlobalAddressSDNode *Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
static SmallVector<std::pair<const GlobalVariable *, int>> LoweredVariables;
|
||||
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
const DataLayout &DL = DAG.getDataLayout();
|
||||
auto *GV = cast<GlobalVariable>(Op->getGlobal());
|
||||
for(auto &VA : LoweredVariables) {
|
||||
if(VA.first == GV)
|
||||
return DAG.getFrameIndex(VA.second, MVT::i32);
|
||||
}
|
||||
unsigned AlignValue = DL.getABITypeAlignment(GV->getValueType());
|
||||
int FI = MFI.CreateStackObject(DL.getTypeAllocSize(GV->getValueType())
|
||||
/*Offset need to be modified too*/,
|
||||
Align(AlignValue), false, nullptr, RISCVStackID::LocalMemSpill);
|
||||
LoweredVariables.push_back(std::make_pair(GV, FI));
|
||||
return DAG.getFrameIndex(FI, MVT::i32);
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
|
||||
|
@ -7458,7 +7486,6 @@ SDValue RISCVTargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
|
|||
|
||||
return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
|
||||
SDValue Op,
|
||||
const SDLoc &DL,
|
||||
|
@ -11480,7 +11507,7 @@ static bool CC_Ventus(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
|
|||
|
||||
// Allocate stack for arguments which can not use register
|
||||
unsigned StackOffset =
|
||||
Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
|
||||
Reg ? 0 : -State.AllocateStack(StoreSizeBytes, StackAlign);
|
||||
|
||||
// If we reach this point and PendingLocs is non-empty, we must be at the
|
||||
// end of a split argument that must be passed indirectly.
|
||||
|
@ -13483,6 +13510,10 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
|||
}
|
||||
case ISD::STORE: {
|
||||
const StoreSDNode *Store= cast<StoreSDNode>(N);
|
||||
auto &MFI = FLI->MF->getFrameInfo();
|
||||
if(auto *BaseBase = dyn_cast<FrameIndexSDNode>(Store->getOperand(1)))
|
||||
if(MFI.getStackID(BaseBase->getIndex()) == RISCVStackID::SGPRSpill)
|
||||
return false;
|
||||
return Store->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS ||
|
||||
Store->getPointerInfo().StackID == RISCVStackID::VGPRSpill;
|
||||
}
|
||||
|
@ -13494,6 +13525,8 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
|||
case ISD::INTRINSIC_W_CHAIN:
|
||||
return RISCVII::isIntrinsicSourceOfDivergence(
|
||||
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
|
||||
case Intrinsic::vastart:
|
||||
return true;
|
||||
/*
|
||||
case AMDGPUISD::ATOMIC_CMP_SWAP:
|
||||
case AMDGPUISD::ATOMIC_INC:
|
||||
|
|
|
@ -646,6 +646,9 @@ private:
|
|||
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue lowerGlobalLocalAddress(GlobalAddressSDNode *Op,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -95,7 +95,7 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
|
|||
return 0;
|
||||
}
|
||||
|
||||
bool RISCVInstrInfo::isVGPRMemoryAccess(const MachineInstr &MI) const {
|
||||
bool RISCVInstrInfo::isPrivateMemoryAccess(const MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
|
@ -107,11 +107,102 @@ bool RISCVInstrInfo::isVGPRMemoryAccess(const MachineInstr &MI) const {
|
|||
case RISCV::VSW:
|
||||
case RISCV::VSH:
|
||||
case RISCV::VSB:
|
||||
case RISCV::VSWI12:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool RISCVInstrInfo::isUniformMemoryAccess(const MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case RISCV::LW:
|
||||
case RISCV::LB:
|
||||
case RISCV::LBU:
|
||||
case RISCV::LH:
|
||||
case RISCV::LHU:
|
||||
case RISCV::SW:
|
||||
case RISCV::SH:
|
||||
case RISCV::SB:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool RISCVInstrInfo::isLocalMemoryAccess(const MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case RISCV::VLWI12:
|
||||
case RISCV::VLBI12:
|
||||
case RISCV::VLBUI12:
|
||||
case RISCV::VLHI12:
|
||||
case RISCV::VLHUI12:
|
||||
case RISCV::VSWI12:
|
||||
case RISCV::VSHI12:
|
||||
case RISCV::VSBI12:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
unsigned RISCVInstrInfo::getPrivateMemoryOpcode(MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
case RISCV::LW:
|
||||
case RISCV::VLWI12:
|
||||
return RISCV::VLW;
|
||||
case RISCV::LB:
|
||||
case RISCV::VLBI12:
|
||||
return RISCV::VLB;
|
||||
case RISCV::LBU:
|
||||
case RISCV::VLBUI12:
|
||||
return RISCV::VLBU;
|
||||
case RISCV::LH:
|
||||
case RISCV::VLHI12:
|
||||
return RISCV::VLH;
|
||||
case RISCV::LHU:
|
||||
case RISCV::VLHUI12:
|
||||
return RISCV::VLHU;
|
||||
case RISCV::SW:
|
||||
case RISCV::VSWI12:
|
||||
return RISCV::VSW;
|
||||
case RISCV::SH:
|
||||
case RISCV::VSHI12:
|
||||
return RISCV::VSH;
|
||||
case RISCV::SB:
|
||||
case RISCV::VSBI12:
|
||||
return RISCV::VSB;
|
||||
default:
|
||||
// MI.dump();
|
||||
assert(0 && "TODO");
|
||||
return RISCV::VLW;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned RISCVInstrInfo::getUniformMemoryOpcode(MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
case RISCV::VLW:
|
||||
return RISCV::VLWI12;
|
||||
case RISCV::VLB:
|
||||
return RISCV::VLBI12;
|
||||
case RISCV::VLBU:
|
||||
return RISCV::VLBUI12;
|
||||
case RISCV::VLH:
|
||||
return RISCV::VLHI12;
|
||||
case RISCV::VLHU:
|
||||
return RISCV::VLHUI12;
|
||||
case RISCV::VSW:
|
||||
return RISCV::VSWI12;
|
||||
case RISCV::VSH:
|
||||
return RISCV::VSHI12;
|
||||
case RISCV::VSB:
|
||||
return RISCV::VSBI12;
|
||||
default:
|
||||
// MI.dump();
|
||||
assert(0 && "TODO");
|
||||
return RISCV::VLW;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex) const {
|
||||
switch (MI.getOpcode()) {
|
||||
|
|
|
@ -55,7 +55,18 @@ public:
|
|||
MCInst getNop() const override;
|
||||
const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const;
|
||||
|
||||
bool isVGPRMemoryAccess(const MachineInstr &MI) const;
|
||||
/// Check the memory access instruction is private memory access
|
||||
bool isPrivateMemoryAccess(const MachineInstr &MI) const;
|
||||
|
||||
/// Check the memory access instruction is uniform memory access
|
||||
bool isUniformMemoryAccess(const MachineInstr &MI) const;
|
||||
|
||||
/// Check the memory access instruction is uniform memory access
|
||||
bool isLocalMemoryAccess(const MachineInstr &MI) const;
|
||||
|
||||
unsigned getPrivateMemoryOpcode(MachineInstr &MI) const;
|
||||
|
||||
unsigned getUniformMemoryOpcode(MachineInstr &MI) const;
|
||||
|
||||
unsigned isLoadFromStackSlot(const MachineInstr &MI,
|
||||
int &FrameIndex) const override;
|
||||
|
|
|
@ -87,10 +87,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
// Use markSuperRegs to ensure any register aliases are also reserved
|
||||
markSuperRegs(Reserved, RISCV::X0); // zero
|
||||
markSuperRegs(Reserved, RISCV::X2); // sp
|
||||
markSuperRegs(Reserved, RISCV::X8); // s0
|
||||
markSuperRegs(Reserved, RISCV::X3); // gp
|
||||
markSuperRegs(Reserved, RISCV::X4); // tp
|
||||
if (TFI->hasFP(MF))
|
||||
markSuperRegs(Reserved, RISCV::X8); // fp
|
||||
// Reserve the base register if we need to realign the stack and allocate
|
||||
// variable-sized objects at runtime.
|
||||
if (TFI->hasBP(MF))
|
||||
|
@ -333,7 +332,9 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
|
||||
|
||||
MachineInstr &MI = *II;
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
|
||||
const RISCVRegisterInfo *RI = ST.getRegisterInfo();
|
||||
const RISCVInstrInfo *RII = ST.getInstrInfo();
|
||||
|
@ -343,12 +344,14 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
auto FrameIndexID = MF.getFrameInfo().getStackID(FrameIndex);
|
||||
|
||||
Register FrameReg;
|
||||
StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on divergency route.
|
||||
StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on
|
||||
// divergency route.
|
||||
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
|
||||
// TODO: finish
|
||||
// if(!RII->isVGPRMemoryAccess(MI))
|
||||
// Offset -= StackOffset::getFixed(
|
||||
// MF.getInfo<RISCVMachineFunctionInfo>()->getVarArgsSaveSize() - 4);
|
||||
// MF.getInfo<RISCVMachineFunctionInfo>()->getVarArgsSaveSize() -
|
||||
// 4);
|
||||
int64_t Lo11 = Offset.getFixed();
|
||||
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
|
||||
|
||||
|
@ -356,7 +359,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
report_fatal_error(
|
||||
"Frame offsets outside of the signed 32-bit range not supported");
|
||||
}
|
||||
|
||||
// FIXME: vsw/vlw has 11 bits immediates
|
||||
if (MI.getOpcode() == RISCV::ADDI && !isInt<11>(Offset.getFixed())) {
|
||||
// We chose to emit the canonical immediate sequence rather than folding
|
||||
// the offset into the using add under the theory that doing so doesn't
|
||||
|
@ -369,36 +372,117 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
// operand of our user instruction. As a result, the remaining
|
||||
// offset can by construction, at worst, a LUI and a ADD.
|
||||
int64_t Val = Offset.getFixed();
|
||||
Lo11 = SignExtend64<11>(Val);
|
||||
|
||||
Lo11 = SignExtend64<12>(Val);
|
||||
|
||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo11);
|
||||
Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo11,
|
||||
Offset.getScalable());
|
||||
Offset =
|
||||
StackOffset::get((uint64_t)Val - (uint64_t)Lo11, Offset.getScalable());
|
||||
// adjustReg(*II->getParent(), II, DL, DestReg, FrameReg, Offset,
|
||||
// MachineInstr::NoFlags, std::nullopt);
|
||||
}
|
||||
Register DestReg = MI.getOperand(0).getReg();
|
||||
if (Offset.getScalable() || Offset.getFixed()) {
|
||||
|
||||
if (MI.getOpcode() == RISCV::ADDI)
|
||||
DestReg = MI.getOperand(0).getReg();
|
||||
else
|
||||
DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
// !!!Very importtant for adjust
|
||||
adjustReg(*II->getParent(), II, DL, DestReg, FrameReg, Offset,
|
||||
MachineInstr::NoFlags, std::nullopt);
|
||||
}
|
||||
if (MI.getOpcode() == RISCV::ADDI &&
|
||||
static_cast<unsigned>(FrameIndexID) == RISCVStackID::VGPRSpill) {
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg,
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(FrameReg,
|
||||
/*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
|
||||
}
|
||||
|
||||
if(RII->isVGPRMemoryAccess(MI)) {
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(getPrivateMemoryBaseRegister(MF),
|
||||
if (RII->isPrivateMemoryAccess(MI) && FrameReg == RISCV::X4) {
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(getPrivateMemoryBaseRegister(MF),
|
||||
/*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
// simm11 locates in range [-1024, 1023], if offset not in this range, then
|
||||
// we legalize the offset
|
||||
if(!isInt<11>(Lo11))
|
||||
if (!isInt<12>(Lo11))
|
||||
adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11,
|
||||
getPrivateMemoryBaseRegister(MF), FIOperandNum);
|
||||
}
|
||||
|
||||
if (RII->isPrivateMemoryAccess(MI) && FrameReg == RISCV::X2) {
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(getPrivateMemoryBaseRegister(MF),
|
||||
/*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
// simm11 locates in range [-1024, 1023], if offset not in this range, then
|
||||
// we legalize the offset
|
||||
MI.setDesc(RII->get(RII->getUniformMemoryOpcode(MI)));
|
||||
if (!isInt<12>(Lo11))
|
||||
adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11,
|
||||
getPrivateMemoryBaseRegister(MF), FIOperandNum);
|
||||
}
|
||||
|
||||
// else
|
||||
// MI.getOperand(FIOperandNum)
|
||||
// .ChangeToRegister(FrameReg, /*IsDef*/ false,
|
||||
// /*IsImp*/ false,
|
||||
// /*IsKill*/ false);
|
||||
if (RII->isUniformMemoryAccess(MI) && FrameReg == RISCV::X4) {
|
||||
Register DestReg =
|
||||
MF.getRegInfo().createVirtualRegister(&RISCV::VGPRRegClass);
|
||||
MI.setDesc(RII->get(RII->getPrivateMemoryOpcode(MI)));
|
||||
BuildMI(*MBB, II, DL, RII->get(RISCV::VMV_V_X), DestReg)
|
||||
.addReg(MI.getOperand(FIOperandNum - 1).getReg());
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(getPrivateMemoryBaseRegister(MF), /*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
MI.getOperand(FIOperandNum - 1)
|
||||
.ChangeToRegister(DestReg, /*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (RII->isLocalMemoryAccess(MI) && FrameReg == RISCV::X4) {
|
||||
Register DestReg =
|
||||
MF.getRegInfo().createVirtualRegister(&RISCV::VGPRRegClass);
|
||||
BuildMI(*MBB, II, DL, RII->get(RISCV::VMV_V_X), DestReg).addReg(FrameReg);
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(getFrameRegister(MF), /*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
MI.setDesc(RII->get(RII->getPrivateMemoryOpcode(MI)));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (RII->isLocalMemoryAccess(MI) && FrameReg == RISCV::X2) {
|
||||
Register DestReg =
|
||||
MF.getRegInfo().createVirtualRegister(&RISCV::VGPRRegClass);
|
||||
BuildMI(*MBB, II, DL, RII->get(RISCV::VMV_V_X), DestReg).addReg(FrameReg);
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(DestReg, /*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (RII->isPrivateMemoryAccess(MI))
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(getPrivateMemoryBaseRegister(MF), /*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
else
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false,
|
||||
MI.getOperand(FIOperandNum)
|
||||
.ChangeToRegister(DestReg == MI.getOperand(0).getReg() ? FrameReg
|
||||
: DestReg,
|
||||
/*IsDef*/ false,
|
||||
/*IsImp*/ false,
|
||||
/*IsKill*/ false);
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
@compute_sum_with_localmem.tmp_sum = internal addrspace(3) global [10 x i32] undef, align 4
|
||||
|
||||
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
|
||||
define dso_local ventus_kernel void @compute_sum_with_localmem(ptr addrspace(1) noundef align 4 %a, i32 noundef %n, ptr addrspace(1) noundef align 4 %sum) {
|
||||
; VENTUS-LABEL: compute_sum_with_localmem:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: li t0, 12
|
||||
; VENTUS-NEXT: sw t0, -16(s0)
|
||||
; VENTUS-NEXT: sw t0, -40(s0)
|
||||
; VENTUS-NEXT: sw t0, -4(s0)
|
||||
; VENTUS-NEXT: addi s0, s0, -40
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%a.addr = alloca ptr addrspace(1), align 4, addrspace(5)
|
||||
%n.addr = alloca i32, align 4, addrspace(5)
|
||||
%sum.addr = alloca ptr addrspace(1), align 4, addrspace(5)
|
||||
store ptr addrspace(1) %a, ptr addrspace(5) %a.addr, align 4
|
||||
store i32 %n, ptr addrspace(5) %n.addr, align 4
|
||||
store ptr addrspace(1) %sum, ptr addrspace(5) %sum.addr, align 4
|
||||
store i32 12, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) @compute_sum_with_localmem.tmp_sum, i32 0, i32 6), align 4
|
||||
store i32 12, ptr addrspace(3) @compute_sum_with_localmem.tmp_sum, align 4
|
||||
store i32 12, ptr addrspace(3) getelementptr inbounds ([10 x i32], ptr addrspace(3) @compute_sum_with_localmem.tmp_sum, i32 0, i32 9), align 4
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue