diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 207735fe2f7b..32f91baf0be3 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -912,69 +912,6 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { return std::make_pair(StackSize, RVVStackAlign); } -static unsigned getScavSlotsNumForRVV(MachineFunction &MF) { - // For RVV spill, scalable stack offsets computing requires up to two scratch - // registers - static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2; - - // For RVV spill, non-scalable stack offsets computing requires up to one - // scratch register. - static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1; - - // ADDI instruction's destination register can be used for computing - // offsets. So Scalable stack offsets require up to one scratch register. - static constexpr unsigned ScavSlotsADDIScalableObject = 1; - - static constexpr unsigned MaxScavSlotsNumKnown = - std::max({ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject, - ScavSlotsNumRVVSpillNonScalableObject}); - - unsigned MaxScavSlotsNum = 0; - if (!MF.getSubtarget().hasVInstructions()) - return false; - for (const MachineBasicBlock &MBB : MF) - for (const MachineInstr &MI : MBB) { - bool IsRVVSpill = RISCV::isRVVSpill(MI); - for (auto &MO : MI.operands()) { - if (!MO.isFI()) - continue; - bool IsScalableVectorID = MF.getFrameInfo().getStackID(MO.getIndex()) == - TargetStackID::ScalableVector; - if (IsRVVSpill) { - MaxScavSlotsNum = std::max( - MaxScavSlotsNum, IsScalableVectorID - ? ScavSlotsNumRVVSpillScalableObject - : ScavSlotsNumRVVSpillNonScalableObject); - } else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) { - MaxScavSlotsNum = - std::max(MaxScavSlotsNum, ScavSlotsADDIScalableObject); - } - } - if (MaxScavSlotsNum == MaxScavSlotsNumKnown) - return MaxScavSlotsNumKnown; - } - return MaxScavSlotsNum; -} - -static bool hasRVVFrameObject(const MachineFunction &MF) { - // Originally, the function will scan all the stack objects to check whether - // if there is any scalable vector object on the stack or not. However, it - // causes errors in the register allocator. In issue 53016, it returns false - // before RA because there is no RVV stack objects. After RA, it returns true - // because there are spilling slots for RVV values during RA. It will not - // reserve BP during register allocation and generate BP access in the PEI - // pass due to the inconsistent behavior of the function. - // - // The function is changed to use hasVInstructions() as the return value. It - // is not precise, but it can make the register allocation correct. - // - // FIXME: Find a better way to make the decision or revisit the solution in - // D103622. - // - // Refer to https://github.com/llvm/llvm-project/issues/53016. - return MF.getSubtarget().hasVInstructions(); -} - static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF, const RISCVInstrInfo &TII) { unsigned FnSize = 0; @@ -1022,20 +959,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( const TargetRegisterClass *RC = &RISCV::GPRRegClass; auto *RVFI = MF.getInfo(); - int64_t RVVStackSize; - Align RVVStackAlign; - std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF); - - RVFI->setRVVStackSize(RVVStackSize); - RVFI->setRVVStackAlign(RVVStackAlign); - - if (hasRVVFrameObject(MF)) { - // Ensure the entire stack is aligned to at least the RVV requirement: some - // scalable-vector object alignments are not considered by the - // target-independent code. - MFI.ensureMaxAlignment(RVVStackAlign); - } - unsigned ScavSlotsNum = 0; // estimateStackSize has been observed to under-estimate the final stack @@ -1049,11 +972,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( if (IsLargeFunction) ScavSlotsNum = std::max(ScavSlotsNum, 1u); - // RVV loads & stores have no capacity to hold the immediate address offsets - // so we must always reserve an emergency spill slot if the MachineFunction - // contains any RVV spills. - ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF)); - for (unsigned I = 0; I < ScavSlotsNum; I++) { int FI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC), RegInfo->getSpillAlign(*RC), false); @@ -1084,8 +1002,7 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized( // by the frame pointer. // Let eliminateCallFramePseudoInstr preserve stack space for it. bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo().hasVarSizedObjects() && - !(hasFP(MF) && hasRVVFrameObject(MF)); + return !MF.getFrameInfo().hasVarSizedObjects(); } // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index a0f96fbd70d2..8bfebfe04f25 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -187,54 +187,29 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; - bool IsScalableVector = true; - bool IsZvlsseg = true; if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; - IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSH; - IsScalableVector = false; } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSW; - IsScalableVector = false; } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FSD; - IsScalableVector = false; } else if (RISCV::VGPRRegClass.hasSubClassEq(RC)) { - Opcode = RISCV::VSE32_V; - IsZvlsseg = false; + Opcode = RISCV::VSUXEI32; } else llvm_unreachable("Can't store this register to stack slot"); - if (IsScalableVector) { - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); - MFI.setStackID(FI, TargetStackID::ScalableVector); - auto MIB = BuildMI(MBB, I, DL, get(Opcode)) - .addReg(SrcReg, getKillRegState(IsKill)) - .addFrameIndex(FI) - .addMemOperand(MMO); - if (IsZvlsseg) { - // For spilling/reloading Zvlsseg registers, append the dummy field for - // the scaled vector length. The argument will be used when expanding - // these pseudo instructions. - MIB.addReg(RISCV::X0); - } - } else { - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, - MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); - - BuildMI(MBB, I, DL, get(Opcode)) - .addReg(SrcReg, getKillRegState(IsKill)) - .addFrameIndex(FI) - .addImm(0) - .addMemOperand(MMO); - } + BuildMI(MBB, I, DL, get(Opcode)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); } void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -250,52 +225,28 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = MF->getFrameInfo(); unsigned Opcode; - bool IsScalableVector = true; - bool IsZvlsseg = true; if (RISCV::GPRRegClass.hasSubClassEq(RC)) { Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; - IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLH; - IsScalableVector = false; } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLW; - IsScalableVector = false; } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) { Opcode = RISCV::FLD; - IsScalableVector = false; } else if (RISCV::VGPRRegClass.hasSubClassEq(RC)) { - Opcode = RISCV::VLE32_V; - IsZvlsseg = false; + Opcode = RISCV::VLUXEI32; } else llvm_unreachable("Can't load this register from stack slot"); - if (IsScalableVector) { - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, MFI.getObjectAlign(FI)); + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); - MFI.setStackID(FI, TargetStackID::ScalableVector); - auto MIB = BuildMI(MBB, I, DL, get(Opcode), DstReg) - .addFrameIndex(FI) - .addMemOperand(MMO); - if (IsZvlsseg) { - // For spilling/reloading Zvlsseg registers, append the dummy field for - // the scaled vector length. The argument will be used when expanding - // these pseudo instructions. - MIB.addReg(RISCV::X0); - } - } else { - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); - - BuildMI(MBB, I, DL, get(Opcode), DstReg) - .addFrameIndex(FI) - .addImm(0) - .addMemOperand(MMO); - } + BuildMI(MBB, I, DL, get(Opcode), DstReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); } MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( @@ -1453,19 +1404,6 @@ bool RISCV::isZEXT_B(const MachineInstr &MI) { MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255; } -bool RISCV::isRVVSpill(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - return false; - case RISCV::VS1R_V: - case RISCV::VL1RE8_V: - case RISCV::VL1RE16_V: - case RISCV::VL1RE32_V: - case RISCV::VL1RE64_V: - return true; - } -} - bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) { int16_t MI1FrmOpIdx = RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 1e56108a61c1..a0c537c51089 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -205,10 +205,6 @@ bool isSEXT_W(const MachineInstr &MI); bool isZEXT_W(const MachineInstr &MI); bool isZEXT_B(const MachineInstr &MI); -// Returns true if the given MI is an RVV instruction opcode for which we may -// expect to see a FrameIndex operand. -bool isRVVSpill(const MachineInstr &MI); - // Implemented in RISCVGenInstrInfo.inc int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 54e1343c5b92..c81a71662817 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -34,6 +34,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO.h" #include using namespace llvm; @@ -200,6 +201,11 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { } void RISCVPassConfig::addIRPasses() { + if (getOptLevel() != CodeGenOpt::None) { + addPass(createSROAPass()); + addPass(createInferAddressSpacesPass()); + } + addPass(createAtomicExpandPass()); if (getOptLevel() != CodeGenOpt::None) diff --git a/llvm/lib/Target/RISCV/VentusInstrInfo.td b/llvm/lib/Target/RISCV/VentusInstrInfo.td index c40fcf767153..946a8e7dca0b 100644 --- a/llvm/lib/Target/RISCV/VentusInstrInfo.td +++ b/llvm/lib/Target/RISCV/VentusInstrInfo.td @@ -1572,7 +1572,6 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs } // Predicates = [IsRV64], ... /// Loads - multiclass UniformLdPat { def : Pat<(vt (UniformUnaryFrag (AddrRegImm GPR:$rs1, simm12:$imm12))), (Inst GPR:$rs1, simm12:$imm12)>; @@ -1587,7 +1586,6 @@ defm : UniformLdPat; defm : UniformLdPat; /// Stores - multiclass UniformStPat { def : Pat<(UniformBinFrag (vt StTy:$rs2), (AddrRegImm GPR:$rs1, simm12:$imm12)), diff --git a/llvm/lib/Target/RISCV/VentusInstrInfoV.td b/llvm/lib/Target/RISCV/VentusInstrInfoV.td index 39d63cbfc0b8..1d99b91fc6cc 100644 --- a/llvm/lib/Target/RISCV/VentusInstrInfoV.td +++ b/llvm/lib/Target/RISCV/VentusInstrInfoV.td @@ -30,13 +30,13 @@ multiclass PatVBin Insts> { } // RVV VX, VI instruction pattern class for binary operations -multiclass PatXIBin Insts, +multiclass PatXIBin Insts, int IsReverseOperation = 0> { if !eq(IsReverseOperation, 1) then { def : Pat<(Op GPR:$rs1, VGPR:$rs2), (Insts[0] VGPR:$rs2, GPR:$rs1)>; def : Pat<(XLenVT (Op uimm5:$imm, (XLenVT VGPR:$rs1))), (Insts[1] VGPR:$rs1, uimm5:$imm)>; - } + } else { def : Pat<(Op VGPR:$rs1, GPR:$rs2), (Insts[0] VGPR:$rs1, GPR:$rs2)>; def : Pat<(XLenVT (Op (XLenVT VGPR:$rs1), uimm5:$imm)), @@ -44,6 +44,16 @@ multiclass PatXIBin Insts, } } +multiclass DivergentLdPat { + def : Pat<(vt (DivergentUnaryFrag (AddrRegImm GPR:$rs1, VGPR:$vs2))), + (Inst GPR:$rs1, VGPR:$vs2)>; +} + +multiclass DivergentStPat { + def : Pat<(DivergentBinFrag (vt VGPR:$vs3), (AddrRegImm GPR:$rs1, VGPR:$vs2)), + (Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>; +} + //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -112,9 +122,6 @@ def simm5_plus1_nonzero : ImmLeaf: Sched <[!cast("WriteVMov" # n # "V"), - !cast("ReadVMov" # n # "V")]>; - class VLESched : Sched <[WriteVLDE, ReadVLDX]>; class VSESched : Sched <[WriteVSTE, ReadVSTEV, ReadVSTX]>; @@ -141,38 +148,24 @@ class VLFSched : Sched <[ReadVLDX]>; // Instruction class templates //===----------------------------------------------------------------------===// +// +// FIXME: The encoding for vluxei/vsuxei is not correct!! +// + let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { -// unit-stride load vd, (rs1), vm -class VUnitStrideLoad +// vluxei vd, (rs1), vs2 which vs2 is used as offset, not indexing +class VectorLoad : RVInstVLU<0b000, width.Value{3}, LUMOPUnitStride, width.Value{2-0}, (outs VGPR:$vd), - (ins GPRMem:$rs1), opcodestr, "$vd, (${rs1})">; - -let RVVConstraint = NoConstraint in { -// unit-stride whole register load vlr.v vd, (rs1) -class VWholeLoad nf, RISCVWidth width, string opcodestr, RegisterClass VRC> - : RVInstVLU { - let Uses = []; -} -} // RVVConstraint = NoConstraint + (ins GPRMem:$rs1, VGPR:$vs2), opcodestr, "$vd, (${rs1}), $vs2">; } // hasSideEffects = 0, mayLoad = 1, mayStore = 0 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { -// unit-stride store vd, vs3, (rs1), vm -class VUnitStrideStore +// vsuxei vs3, (rs1), vs2 where vs2 is used as offset, not indexing +class VectorStore : RVInstVSU<0b000, width.Value{3}, SUMOPUnitStride, width.Value{2-0}, - (outs), (ins VGPR:$vs3, GPRMem:$rs1), opcodestr, - "$vs3, (${rs1})">; - -// vsr.v vd, (rs1) -class VWholeStore nf, string opcodestr, RegisterClass VRC> - : RVInstVSU { - let Uses = []; -} + (outs), (ins VGPR:$vs3, GPRMem:$rs1, VGPR:$vs2), opcodestr, + "$vs3, (${rs1}), $vs2">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 1 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { @@ -628,20 +621,6 @@ multiclass VNCLP_IV_V_X_I funct6, Operand optype = sim Sched<[WriteVNClipI_UpperBound, ReadVNClipV_UpperBound]>; } -multiclass VWholeLoadN nf, string opcodestr, RegisterClass VRC> { - foreach l = [8, 16, 32] in { - defvar w = !cast("LSWidth" # l); - defvar s = !cast("WriteVLD" # !add(nf, 1) # "R"); - - def E # l # _V : VWholeLoad, - Sched<[s, ReadVLDX]>; - } -} -multiclass VWholeLoadEEW64 nf, string opcodestr, RegisterClass VRC, SchedReadWrite schedrw> { - def E64_V : VWholeLoad, - Sched<[schedrw, ReadVLDX]>; -} - let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isBranch = 1, isTerminator = 1 in class BranchCC_vvi funct3, string opcodestr> @@ -660,35 +639,17 @@ def VBLTU : BranchCC_vvi<0b110, "vbltu">; def VBGEU : BranchCC_vvi<0b111, "vbgeu">; def JOIN : BranchCC_vvi<0b011, "join">; -foreach eew = [8, 16, 32] in { - defvar w = !cast("LSWidth" # eew); +def VLUXEI8 : VectorLoad; +def VLUXEI16 : VectorLoad; +def VLUXEI32 : VectorLoad; - // Ventus use Vector Unit-Stride Instructions as basic load/store like sALU's - // lb/lh/lw. - // TODO: We may add load multiple byte/short/word version of it. - def VLE#eew#_V : VUnitStrideLoad, VLESched; - def VSE#eew#_V : VUnitStrideStore, VSESched; -} +// TODO: Encoding for these 2 instructions are not correct. +//def VLUXEI8U : VectorLoad; +//def VLUXEI16U : VectorLoad; -let Predicates = [HasVInstructions] in { -defm VL1R : VWholeLoadN<0, "vl1r", VGPR>; - -def VS1R_V : VWholeStore<0, "vs1r.v", VGPR>, - Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>; - -def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VGPR:$vd, GPR:$rs1)>; -} // Predicates = [HasVInstructions] - -let Predicates = [HasVInstructionsI64] in { -// Vector Unit-Stride Instructions -def VLE64_V : VUnitStrideLoad, - VLESched; - -def VSE64_V : VUnitStrideStore, - VSESched; - -defm VL1R: VWholeLoadEEW64<0, "vl1r", VGPR, WriteVLD1R>; -} // Predicates = [HasVInstructionsI64] +def VSUXEI8 : VectorStore; +def VSUXEI16 : VectorStore; +def VSUXEI32 : VectorStore; let Predicates = [HasVInstructions] in { // Vector Single-Width Integer Add and Subtract @@ -1098,6 +1059,16 @@ defm : PatVBin, [VDIVU_VV, VDIVU_VX]>; defm : PatVBin, [VREM_VV, VREM_VX]>; defm : PatVBin, [VREMU_VV, VREMU_VX]>; +defm : DivergentLdPat; +defm : DivergentLdPat; +defm : DivergentLdPat; +defm : DivergentLdPat; +defm : DivergentLdPat; +//defm : DivergentLdPat; +//defm : DivergentLdPat; +defm : DivergentStPat; +defm : DivergentStPat; +defm : DivergentStPat; // Match `riscv_brcc` and lower to the appropriate RISC-V branch instruction. class DivergentBccPat