From f494e20d447d71a5327a081b85dbc1526237efbd Mon Sep 17 00:00:00 2001 From: zhoujing Date: Sun, 25 Jun 2023 10:48:57 +0800 Subject: [PATCH] [VENTUS][RISCV][fix] Fix private memory access instructions' codegen errors We changed the private memory access' encoding in this commit `https://github.com/THU-DSP-LAB/llvm-project/commit/6da666856b672cf0022dd5774964e76c9871a78e`, this commit is to fix the codegen bugs by that commit --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 7 ++ llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 42 +++++++--- llvm/lib/Target/RISCV/RISCVFrameLowering.h | 10 +++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 16 ++++ llvm/lib/Target/RISCV/RISCVInstrInfo.h | 3 + llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 81 +++++++++++++++++-- llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 19 +++-- llvm/lib/Target/RISCV/VentusInstrInfoV.td | 8 +- 8 files changed, 158 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index b8a521eedf7d..c3148d704690 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1262,9 +1262,16 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, (1 << 4), "immediate must be in the range"); } + case Match_InvalidRnumArg: { return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10); } + + case Match_InvalidSImm11: { + SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "simm11 range is[-1024, 1023]"); + }; + } llvm_unreachable("Unknown match type detected!"); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 90f61359cd4d..3e86fb0235ea 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -414,6 +414,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::VMV_S_X), + RI->getPrivateMemoryBaseRegister(MF)) + .addReg(RI->getPrivateMemoryBaseRegister(MF)) + .addReg(TPReg); } const auto &CSI = MFI.getCalleeSavedInfo(); @@ -499,6 +503,22 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, emitSCSEpilogue(MF, MBB, MBBI, DL); } +uint64_t RISCVFrameLowering::getExtractedStackOffset(const MachineFunction &MF, + unsigned FI, RISCVStackID::Value Stack) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + uint64_t StackSize = 0; + for(int I = FI + 1; I != MFI.getObjectIndexEnd(); I++) { + if(static_cast(MFI.getStackID(I)) != Stack) { + // Need to consider the alignment for different frame index + uint64_t Align = MFI.getObjectAlign(I).value(); + uint64_t ActualAlignSize = (Align + 3) >> 2; + uint64_t Size = ActualAlignSize * MFI.getObjectSize(I); + StackSize += Size; + } + } + return StackSize; +} + StackOffset RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const { @@ -516,9 +536,11 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, StackID == RISCVStackID::SGPRSpill || StackID == RISCVStackID::VGPRSpill) && "Unexpected stack ID for the frame object."); + uint8_t Stack = MFI.getStackID(FI); StackOffset Offset = - StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + - MFI.getOffsetAdjustment()); + StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + -getExtractedStackOffset(MF, FI, RISCVStackID::Value(Stack)) + + MFI.getOffsetAdjustment()); if (CSI.size()) { MinCSFI = CSI[0].getFrameIdx(); @@ -692,17 +714,17 @@ uint64_t RISCVFrameLowering::getStackSize(MachineFunction &MF, } void RISCVFrameLowering::determineStackID(MachineFunction &MF) const { - llvm::MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); for(int I = MFI.getObjectIndexBegin(); I != MFI.getObjectIndexEnd(); I++) { // FIXME: There is no sGPR spill stack! - MFI.setStackID(I, RISCVStackID::VGPRSpill); + // MFI.setStackID(I, RISCVStackID::VGPRSpill); - // MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I); - // if(MFI.getStackID(I) != RISCVStackID::SGPRSpill && - // PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS) - // MFI.setStackID(I, RISCVStackID::VGPRSpill); - // else - // MFI.setStackID(I, RISCVStackID::SGPRSpill); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF,I); + if(MFI.getStackID(I) != RISCVStackID::SGPRSpill && + PtrInfo.getAddrSpace() == RISCVAS::PRIVATE_ADDRESS) + MFI.setStackID(I, RISCVStackID::VGPRSpill); + else + MFI.setStackID(I, RISCVStackID::SGPRSpill); } } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h index e3abcf832dff..8f08b4e16f5e 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h @@ -67,6 +67,16 @@ public: /// Get stack size for different stack ID uint64_t getStackSize(MachineFunction &MF, RISCVStackID::Value ID) const; + /// Frame Objects: + /// fi#0: id=4 size=48, align=4, at location [SP+8] + /// fi#1: id=1 size=4, align=4, at location [SP+4] \ + /// fi#2: id=1 size=4, align=4, at location [SP] \ + /// As we can see, if we split the stack, different frame offset calculation + /// need to be modified too, when calculate the TP stack offset, we need to + /// extract the stack offset of 'SP' in machine function frame + uint64_t getExtractedStackOffset(const MachineFunction &MF, unsigned FI, + RISCVStackID::Value Stack) const; + /// Before insert prolog/epilog information, set stack ID for each frame index void determineStackID(MachineFunction &MF) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1a69720db7d6..08acba62f06c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -98,6 +98,22 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return 0; } +bool RISCVInstrInfo::isPrivateMemoryAccess(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + return false; + case RISCV::VLW: + case RISCV::VLB: + case RISCV::VLBU: + case RISCV::VLH: + case RISCV::VLHU: + case RISCV::VSW: + case RISCV::VSH: + case RISCV::VSB: + return true; + } +} + unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index f4d29b62bef6..5f241c8b4144 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -53,6 +53,9 @@ public: const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const; const MCInstrDesc &getVBrCond(RISCVCC::CondCode CC) const; + + bool isPrivateMemoryAccess(const MachineInstr &MI) const; + unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override; unsigned isStoreToStackSlot(const MachineInstr &MI, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 25cefab396c6..55706de3c7b4 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -99,6 +100,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { markSuperRegs(Reserved, RISCV::FRM); markSuperRegs(Reserved, RISCV::FFLAGS); + markSuperRegs(Reserved, getPrivateMemoryBaseRegister( + const_cast(MF))); + assert(checkAllSuperRegsMarked(Reserved)); return Reserved; } @@ -156,6 +160,14 @@ bool RISCVRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI, return RC ? isSGPRClass(RC) : false; } +const Register RISCVRegisterInfo::getPrivateMemoryBaseRegister( + const MachineFunction &MF) const { + // FIXME: V0-V31 are used for argument registers, so here we use V32 for + // private memory based register, but V32 is beyond the 5 bits ranges, when + // this register are used, one more instruction is used + return RISCV::V32; +} + const TargetRegisterClass * RISCVRegisterInfo::getPhysRegClass(MCRegister Reg) const { static const TargetRegisterClass *const BaseClasses[] = { @@ -247,6 +259,32 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, .setMIFlag(Flag); } +void RISCVRegisterInfo::adjustPriMemRegOffset(MachineFunction &MF, + MachineBasicBlock &MBB, MachineInstr &MI, int64_t Offset, + Register PriMemReg, unsigned FIOperandNum) const{ + auto &MRI = MF.getRegInfo(); + const RISCVSubtarget &ST = MF.getSubtarget(); + const RISCVInstrInfo *TII = ST.getInstrInfo(); + assert(!isSGPRReg(MRI, PriMemReg) && "Private memory base address in VGPR"); + bool IsNegative = (Offset < -1024); + (--MI.getIterator()); + Register ScratchReg = MRI.createVirtualRegister(&RISCV::VGPRRegClass); + // FIXME: maybe it is better change offset once rather than insert a new + // machine instruction?? + BuildMI(MBB, --MI.getIterator(), (--MI.getIterator())->getDebugLoc(), + TII->get(RISCV::VADD_VI)) + .addReg(ScratchReg) + .addReg(PriMemReg) + .addImm(IsNegative ? (Offset / -1024) * 1024 : -(Offset / 1024) * 1024); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(IsNegative ? + Offset + (Offset / -1024) * 1024 + : Offset - (Offset / 1024) * 1024); + MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, + /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/true); +} + /// This function is to eliminate frame index for MachineInstruction in /// StoreRegToSlot/LoadRegFromSlot function bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, @@ -257,13 +295,17 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); const RISCVSubtarget &ST = MF.getSubtarget(); + const RISCVRegisterInfo * RI = ST.getRegisterInfo(); + const RISCVInstrInfo *RII = ST.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + auto FrameIndexID = MF.getFrameInfo().getStackID(FrameIndex); + Register FrameReg; StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on divergency route. getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg); - + int64_t Lo11 = Offset.getFixed(); Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); if (!isInt<32>(Offset.getFixed())) { @@ -271,7 +313,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, "Frame offsets outside of the signed 32-bit range not supported"); } - if (MI.getOpcode() == RISCV::ADDI && !isInt<12>(Offset.getFixed())) { + if (MI.getOpcode() == RISCV::ADDI && !isInt<11>(Offset.getFixed())) { // We chose to emit the canonical immediate sequence rather than folding // the offset into the using add under the theory that doing so doesn't // save dynamic instruction count and some target may fuse the canonical @@ -283,15 +325,38 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // operand of our user instruction. As a result, the remaining // offset can by construction, at worst, a LUI and a ADD. int64_t Val = Offset.getFixed(); - int64_t Lo12 = SignExtend64<12>(Val); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo12); - Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo12, + Lo11 = SignExtend64<11>(Val); + + + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo11); + Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo11, Offset.getScalable()); } + if(MI.getOpcode() == RISCV::ADDI && + static_cast(FrameIndexID) == RISCVStackID::VGPRSpill) { + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, + /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/false); - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false, - /*IsImp*/false, - /*IsKill*/false); + } + + if(RII->isPrivateMemoryAccess(MI)) { + MI.getOperand(FIOperandNum).ChangeToRegister(getPrivateMemoryBaseRegister(MF), + /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/false); + // simm11 locates in range [-1024, 1023], if offset not in this range, then + // we legalize the offset + if(!isInt<11>(Lo11)) + adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11, + getPrivateMemoryBaseRegister(MF), FIOperandNum); + } + + else + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false, + /*IsImp*/false, + /*IsKill*/false); // If after materializing the adjustment, we have a pointless ADDI, remove it if (MI.getOpcode() == RISCV::ADDI && diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 607434ef5c87..ea6584f8aaa9 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -21,11 +21,7 @@ namespace llvm { // This needs to be kept in sync with the field bits in VentusRegisterClass. -enum RISCVRCFlags { - IsVGPR = 1 << 0, - IsSGPR = 1 << 1 -}; // enum RISCVRCFlags - +enum RISCVRCFlags { IsVGPR = 1 << 0, IsSGPR = 1 << 1 }; // enum RISCVRCFlags struct RISCVRegisterInfo : public RISCVGenRegisterInfo { @@ -84,12 +80,23 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { StackOffset Offset, MachineInstr::MIFlag Flag, MaybeAlign RequiredAlign) const; + /// Adjust private memory offset which is supposed to be simm11, when offset is + /// beyond the range, we need to legalize the offset + void adjustPriMemRegOffset(MachineFunction &MF, MachineBasicBlock &MBB, + MachineInstr &MI, int64_t offset, Register PriMemReg, + unsigned FIOperandNum) const; + bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; Register getFrameRegister(const MachineFunction &MF) const override; + /// In Ventus, private memory access are based on TP, but the memory access + /// instructions are based on VGPR, we need to define a VGPR register for + /// private memory access + const Register getPrivateMemoryBaseRegister(const MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; } @@ -118,6 +125,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/VentusInstrInfoV.td b/llvm/lib/Target/RISCV/VentusInstrInfoV.td index 284d845e6332..f75b9dbfa758 100644 --- a/llvm/lib/Target/RISCV/VentusInstrInfoV.td +++ b/llvm/lib/Target/RISCV/VentusInstrInfoV.td @@ -47,13 +47,13 @@ multiclass PatVFRBin Insts> { class DivergentPriLdPat : Pat<(XLenVT (DivergentPrivateLoadFrag - (PriAddrRegImm TP:$rs1, (simm11:$imm11)))), - (Inst TP:$rs1, simm11:$imm11)>; + (PriAddrRegImm (XLenVT VGPR:$rs1), (simm11:$imm11)))), + (Inst VGPR:$rs1, simm11:$imm11)>; class DivergentPriStPat : Pat<(DivergentPrivateStoreFrag - (XLenVT VGPR:$vs3), (PriAddrRegImm TP:$rs1, (simm11:$imm11))), - (Inst VGPR:$vs3, TP:$rs1, simm11:$imm11)>; + (XLenVT VGPR:$vs3), (PriAddrRegImm (XLenVT VGPR:$rs1), (simm11:$imm11))), + (Inst VGPR:$vs3, VGPR:$rs1, simm11:$imm11)>; class DivergentNonPriLdPat : Pat<(XLenVT (DivergentNonPrivateLoadFrag