[VENTUS][RISCV][fix] Fix private memory access instructions' codegen errors

We changed the private memory access' encoding in this commit `6da666856b`,
this commit is to fix the codegen bugs by that commit
This commit is contained in:
zhoujing 2023-06-25 10:48:57 +08:00
parent e6b7935c89
commit f494e20d44
8 changed files with 158 additions and 28 deletions

View File

@ -1262,9 +1262,16 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
(1 << 4),
"immediate must be in the range");
}
case Match_InvalidRnumArg: {
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10);
}
case Match_InvalidSImm11: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "simm11 range is[-1024, 1023]");
};
}
llvm_unreachable("Unknown match type detected!");

View File

@ -414,6 +414,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::VMV_S_X),
RI->getPrivateMemoryBaseRegister(MF))
.addReg(RI->getPrivateMemoryBaseRegister(MF))
.addReg(TPReg);
}
const auto &CSI = MFI.getCalleeSavedInfo();
@ -499,6 +503,22 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
emitSCSEpilogue(MF, MBB, MBBI, DL);
}
uint64_t RISCVFrameLowering::getExtractedStackOffset(const MachineFunction &MF,
unsigned FI, RISCVStackID::Value Stack) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
uint64_t StackSize = 0;
for(int I = FI + 1; I != MFI.getObjectIndexEnd(); I++) {
if(static_cast<unsigned>(MFI.getStackID(I)) != Stack) {
// Need to consider the alignment for different frame index
uint64_t Align = MFI.getObjectAlign(I).value();
uint64_t ActualAlignSize = (Align + 3) >> 2;
uint64_t Size = ActualAlignSize * MFI.getObjectSize(I);
StackSize += Size;
}
}
return StackSize;
}
StackOffset
RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const {
@ -516,9 +536,11 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
StackID == RISCVStackID::SGPRSpill ||
StackID == RISCVStackID::VGPRSpill) &&
"Unexpected stack ID for the frame object.");
uint8_t Stack = MFI.getStackID(FI);
StackOffset Offset =
StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
MFI.getOffsetAdjustment());
StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea()
-getExtractedStackOffset(MF, FI, RISCVStackID::Value(Stack))
+ MFI.getOffsetAdjustment());
if (CSI.size()) {
MinCSFI = CSI[0].getFrameIdx();
@ -692,17 +714,17 @@ uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF,
}
void RISCVFrameLowering::determineStackID(MachineFunction &MF) const {
llvm::MachineFrameInfo &MFI = MF.getFrameInfo();
MachineFrameInfo &MFI = MF.getFrameInfo();
for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) {
// FIXME: There is no sGPR spill stack!
MFI.setStackID(I, RISCVStackID::VGPRSpill);
// MFI.setStackID(I, RISCVStackID::VGPRSpill);
// MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I);
// if(MFI.getStackID(I) != RISCVStackID::SGPRSpill &&
// PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
// MFI.setStackID(I, RISCVStackID::VGPRSpill);
// else
// MFI.setStackID(I, RISCVStackID::SGPRSpill);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I);
if(MFI.getStackID(I) != RISCVStackID::SGPRSpill &&
PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS)
MFI.setStackID(I, RISCVStackID::VGPRSpill);
else
MFI.setStackID(I, RISCVStackID::SGPRSpill);
}
}

View File

@ -67,6 +67,16 @@ public:
/// Get stack size for different stack ID
uint64_t getStackSize(MachineFunction &MF, RISCVStackID::Value ID) const;
/// Frame Objects:
/// fi#0: id=4 size=48, align=4, at location [SP+8]
/// fi#1: id=1 size=4, align=4, at location [SP+4] \
/// fi#2: id=1 size=4, align=4, at location [SP] \
/// As we can see, if we split the stack, different frame offset calculation
/// need to be modified too, when calculate the TP stack offset, we need to
/// extract the stack offset of 'SP' in machine function frame
uint64_t getExtractedStackOffset(const MachineFunction &MF, unsigned FI,
RISCVStackID::Value Stack) const;
/// Before insert prolog/epilog information, set stack ID for each frame index
void determineStackID(MachineFunction &MF) const;

View File

@ -98,6 +98,22 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return 0;
}
bool RISCVInstrInfo::isPrivateMemoryAccess(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
return false;
case RISCV::VLW:
case RISCV::VLB:
case RISCV::VLBU:
case RISCV::VLH:
case RISCV::VLHU:
case RISCV::VSW:
case RISCV::VSH:
case RISCV::VSB:
return true;
}
}
unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {

View File

@ -53,6 +53,9 @@ public:
const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const;
const MCInstrDesc &getVBrCond(RISCVCC::CondCode CC) const;
bool isPrivateMemoryAccess(const MachineInstr &MI) const;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
unsigned isStoreToStackSlot(const MachineInstr &MI,

View File

@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
@ -99,6 +100,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, RISCV::FRM);
markSuperRegs(Reserved, RISCV::FFLAGS);
markSuperRegs(Reserved, getPrivateMemoryBaseRegister(
const_cast<MachineFunction&>(MF)));
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
@ -156,6 +160,14 @@ bool RISCVRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
return RC ? isSGPRClass(RC) : false;
}
const Register RISCVRegisterInfo::getPrivateMemoryBaseRegister(
const MachineFunction &MF) const {
// FIXME: V0-V31 are used for argument registers, so here we use V32 for
// private memory based register, but V32 is beyond the 5 bits ranges, when
// this register are used, one more instruction is used
return RISCV::V32;
}
const TargetRegisterClass *
RISCVRegisterInfo::getPhysRegClass(MCRegister Reg) const {
static const TargetRegisterClass *const BaseClasses[] = {
@ -247,6 +259,32 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}
void RISCVRegisterInfo::adjustPriMemRegOffset(MachineFunction &MF,
MachineBasicBlock &MBB, MachineInstr &MI, int64_t Offset,
Register PriMemReg, unsigned FIOperandNum) const{
auto &MRI = MF.getRegInfo();
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
const RISCVInstrInfo *TII = ST.getInstrInfo();
assert(!isSGPRReg(MRI, PriMemReg) && "Private memory base address in VGPR");
bool IsNegative = (Offset < -1024);
(--MI.getIterator());
Register ScratchReg = MRI.createVirtualRegister(&RISCV::VGPRRegClass);
// FIXME: maybe it is better change offset once rather than insert a new
// machine instruction??
BuildMI(MBB, --MI.getIterator(), (--MI.getIterator())->getDebugLoc(),
TII->get(RISCV::VADD_VI))
.addReg(ScratchReg)
.addReg(PriMemReg)
.addImm(IsNegative ? (Offset / -1024) * 1024 : -(Offset / 1024) * 1024);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(IsNegative ?
Offset + (Offset / -1024) * 1024
: Offset - (Offset / 1024) * 1024);
MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg,
/*IsDef*/false,
/*IsImp*/false,
/*IsKill*/true);
}
/// This function is to eliminate frame index for MachineInstruction in
/// StoreRegToSlot/LoadRegFromSlot function
bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
@ -257,13 +295,17 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
const RISCVRegisterInfo * RI = ST.getRegisterInfo();
const RISCVInstrInfo *RII = ST.getInstrInfo();
DebugLoc DL = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
auto FrameIndexID = MF.getFrameInfo().getStackID(FrameIndex);
Register FrameReg;
StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on divergency route.
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
int64_t Lo11 = Offset.getFixed();
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
if (!isInt<32>(Offset.getFixed())) {
@ -271,7 +313,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
"Frame offsets outside of the signed 32-bit range not supported");
}
if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) {
if (MI.getOpcode() == RISCV::ADDI && !isInt<11>(Offset.getFixed())) {
// We chose to emit the canonical immediate sequence rather than folding
// the offset into the using add under the theory that doing so doesn't
// save dynamic instruction count and some target may fuse the canonical
@ -283,15 +325,38 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// operand of our user instruction. As a result, the remaining
// offset can by construction, at worst, a LUI and a ADD.
int64_t Val = Offset.getFixed();
int64_t Lo12 = SignExtend64<12>(Val);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12);
Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12,
Lo11 = SignExtend64<11>(Val);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo11);
Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo11,
Offset.getScalable());
}
if(MI.getOpcode() == RISCV::ADDI &&
static_cast<unsigned>(FrameIndexID) == RISCVStackID::VGPRSpill) {
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg,
/*IsDef*/false,
/*IsImp*/false,
/*IsKill*/false);
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false,
/*IsImp*/false,
/*IsKill*/false);
}
if(RII->isPrivateMemoryAccess(MI)) {
MI.getOperand(FIOperandNum).ChangeToRegister(getPrivateMemoryBaseRegister(MF),
/*IsDef*/false,
/*IsImp*/false,
/*IsKill*/false);
// simm11 locates in range [-1024, 1023], if offset not in this range, then
// we legalize the offset
if(!isInt<11>(Lo11))
adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11,
getPrivateMemoryBaseRegister(MF), FIOperandNum);
}
else
MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false,
/*IsImp*/false,
/*IsKill*/false);
// If after materializing the adjustment, we have a pointless ADDI, remove it
if (MI.getOpcode() == RISCV::ADDI &&

View File

@ -21,11 +21,7 @@
namespace llvm {
// This needs to be kept in sync with the field bits in VentusRegisterClass.
enum RISCVRCFlags {
IsVGPR = 1 << 0,
IsSGPR = 1 << 1
}; // enum RISCVRCFlags
enum RISCVRCFlags { IsVGPR = 1 << 0, IsSGPR = 1 << 1 }; // enum RISCVRCFlags
struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
@ -84,12 +80,23 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
StackOffset Offset, MachineInstr::MIFlag Flag,
MaybeAlign RequiredAlign) const;
/// Adjust private memory offset which is supposed to be simm11, when offset is
/// beyond the range, we need to legalize the offset
void adjustPriMemRegOffset(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr &MI, int64_t offset, Register PriMemReg,
unsigned FIOperandNum) const;
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
/// In Ventus, private memory access are based on TP, but the memory access
/// instructions are based on VGPR, we need to define a VGPR register for
/// private memory access
const Register getPrivateMemoryBaseRegister(const MachineFunction &MF) const;
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
}
@ -118,6 +125,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
};
}
} // namespace llvm
#endif

View File

@ -47,13 +47,13 @@ multiclass PatVFRBin<SDPatternOperator Op, list<RVInst> Insts> {
class DivergentPriLdPat<PatFrag LoadOp, RVInst Inst>
: Pat<(XLenVT (DivergentPrivateLoadFrag<LoadOp>
(PriAddrRegImm TP:$rs1, (simm11:$imm11)))),
(Inst TP:$rs1, simm11:$imm11)>;
(PriAddrRegImm (XLenVT VGPR:$rs1), (simm11:$imm11)))),
(Inst VGPR:$rs1, simm11:$imm11)>;
class DivergentPriStPat<PatFrag StoreOp, RVInst Inst>
: Pat<(DivergentPrivateStoreFrag<StoreOp>
(XLenVT VGPR:$vs3), (PriAddrRegImm TP:$rs1, (simm11:$imm11))),
(Inst VGPR:$vs3, TP:$rs1, simm11:$imm11)>;
(XLenVT VGPR:$vs3), (PriAddrRegImm (XLenVT VGPR:$rs1), (simm11:$imm11))),
(Inst VGPR:$vs3, VGPR:$rs1, simm11:$imm11)>;
class DivergentNonPriLdPat<PatFrag LoadOp, RVInst Inst>
: Pat<(XLenVT (DivergentNonPrivateLoadFrag<LoadOp>