Initially add vector load/store instruction and related codegen

This commit is contained in:
Aries 2022-12-21 16:26:36 +08:00
parent e171152174
commit 8c531048c2
6 changed files with 66 additions and 240 deletions

View File

@ -912,69 +912,6 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
return std::make_pair(StackSize, RVVStackAlign);
}
static unsigned getScavSlotsNumForRVV(MachineFunction &MF) {
// For RVV spill, scalable stack offsets computing requires up to two scratch
// registers
static constexpr unsigned ScavSlotsNumRVVSpillScalableObject = 2;
// For RVV spill, non-scalable stack offsets computing requires up to one
// scratch register.
static constexpr unsigned ScavSlotsNumRVVSpillNonScalableObject = 1;
// ADDI instruction's destination register can be used for computing
// offsets. So Scalable stack offsets require up to one scratch register.
static constexpr unsigned ScavSlotsADDIScalableObject = 1;
static constexpr unsigned MaxScavSlotsNumKnown =
std::max({ScavSlotsADDIScalableObject, ScavSlotsNumRVVSpillScalableObject,
ScavSlotsNumRVVSpillNonScalableObject});
unsigned MaxScavSlotsNum = 0;
if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
return false;
for (const MachineBasicBlock &MBB : MF)
for (const MachineInstr &MI : MBB) {
bool IsRVVSpill = RISCV::isRVVSpill(MI);
for (auto &MO : MI.operands()) {
if (!MO.isFI())
continue;
bool IsScalableVectorID = MF.getFrameInfo().getStackID(MO.getIndex()) ==
TargetStackID::ScalableVector;
if (IsRVVSpill) {
MaxScavSlotsNum = std::max(
MaxScavSlotsNum, IsScalableVectorID
? ScavSlotsNumRVVSpillScalableObject
: ScavSlotsNumRVVSpillNonScalableObject);
} else if (MI.getOpcode() == RISCV::ADDI && IsScalableVectorID) {
MaxScavSlotsNum =
std::max(MaxScavSlotsNum, ScavSlotsADDIScalableObject);
}
}
if (MaxScavSlotsNum == MaxScavSlotsNumKnown)
return MaxScavSlotsNumKnown;
}
return MaxScavSlotsNum;
}
static bool hasRVVFrameObject(const MachineFunction &MF) {
// Originally, the function will scan all the stack objects to check whether
// if there is any scalable vector object on the stack or not. However, it
// causes errors in the register allocator. In issue 53016, it returns false
// before RA because there is no RVV stack objects. After RA, it returns true
// because there are spilling slots for RVV values during RA. It will not
// reserve BP during register allocation and generate BP access in the PEI
// pass due to the inconsistent behavior of the function.
//
// The function is changed to use hasVInstructions() as the return value. It
// is not precise, but it can make the register allocation correct.
//
// FIXME: Find a better way to make the decision or revisit the solution in
// D103622.
//
// Refer to https://github.com/llvm/llvm-project/issues/53016.
return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
}
static unsigned estimateFunctionSizeInBytes(const MachineFunction &MF,
const RISCVInstrInfo &TII) {
unsigned FnSize = 0;
@ -1022,20 +959,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
int64_t RVVStackSize;
Align RVVStackAlign;
std::tie(RVVStackSize, RVVStackAlign) = assignRVVStackObjectOffsets(MF);
RVFI->setRVVStackSize(RVVStackSize);
RVFI->setRVVStackAlign(RVVStackAlign);
if (hasRVVFrameObject(MF)) {
// Ensure the entire stack is aligned to at least the RVV requirement: some
// scalable-vector object alignments are not considered by the
// target-independent code.
MFI.ensureMaxAlignment(RVVStackAlign);
}
unsigned ScavSlotsNum = 0;
// estimateStackSize has been observed to under-estimate the final stack
@ -1049,11 +972,6 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
if (IsLargeFunction)
ScavSlotsNum = std::max(ScavSlotsNum, 1u);
// RVV loads & stores have no capacity to hold the immediate address offsets
// so we must always reserve an emergency spill slot if the MachineFunction
// contains any RVV spills.
ScavSlotsNum = std::max(ScavSlotsNum, getScavSlotsNumForRVV(MF));
for (unsigned I = 0; I < ScavSlotsNum; I++) {
int FI = MFI.CreateStackObject(RegInfo->getSpillSize(*RC),
RegInfo->getSpillAlign(*RC), false);
@ -1084,8 +1002,7 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
// by the frame pointer.
// Let eliminateCallFramePseudoInstr preserve stack space for it.
bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo().hasVarSizedObjects() &&
!(hasFP(MF) && hasRVVFrameObject(MF));
return !MF.getFrameInfo().hasVarSizedObjects();
}
// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions.

View File

@ -187,54 +187,29 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = MF->getFrameInfo();
unsigned Opcode;
bool IsScalableVector = true;
bool IsZvlsseg = true;
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSH;
IsScalableVector = false;
} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSW;
IsScalableVector = false;
} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSD;
IsScalableVector = false;
} else if (RISCV::VGPRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VSE32_V;
IsZvlsseg = false;
Opcode = RISCV::VSUXEI32;
} else
llvm_unreachable("Can't store this register to stack slot");
if (IsScalableVector) {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
auto MIB = BuildMI(MBB, I, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addMemOperand(MMO);
if (IsZvlsseg) {
// For spilling/reloading Zvlsseg registers, append the dummy field for
// the scaled vector length. The argument will be used when expanding
// these pseudo instructions.
MIB.addReg(RISCV::X0);
}
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
BuildMI(MBB, I, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@ -250,52 +225,28 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = MF->getFrameInfo();
unsigned Opcode;
bool IsScalableVector = true;
bool IsZvlsseg = true;
if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLH;
IsScalableVector = false;
} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLW;
IsScalableVector = false;
} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLD;
IsScalableVector = false;
} else if (RISCV::VGPRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::VLE32_V;
IsZvlsseg = false;
Opcode = RISCV::VLUXEI32;
} else
llvm_unreachable("Can't load this register from stack slot");
if (IsScalableVector) {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
auto MIB = BuildMI(MBB, I, DL, get(Opcode), DstReg)
.addFrameIndex(FI)
.addMemOperand(MMO);
if (IsZvlsseg) {
// For spilling/reloading Zvlsseg registers, append the dummy field for
// the scaled vector length. The argument will be used when expanding
// these pseudo instructions.
MIB.addReg(RISCV::X0);
}
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
BuildMI(MBB, I, DL, get(Opcode), DstReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
BuildMI(MBB, I, DL, get(Opcode), DstReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
@ -1453,19 +1404,6 @@ bool RISCV::isZEXT_B(const MachineInstr &MI) {
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
}
bool RISCV::isRVVSpill(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case RISCV::VS1R_V:
case RISCV::VL1RE8_V:
case RISCV::VL1RE16_V:
case RISCV::VL1RE32_V:
case RISCV::VL1RE64_V:
return true;
}
}
bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
int16_t MI1FrmOpIdx =
RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);

View File

@ -205,10 +205,6 @@ bool isSEXT_W(const MachineInstr &MI);
bool isZEXT_W(const MachineInstr &MI);
bool isZEXT_B(const MachineInstr &MI);
// Returns true if the given MI is an RVV instruction opcode for which we may
// expect to see a FrameIndex operand.
bool isRVVSpill(const MachineInstr &MI);
// Implemented in RISCVGenInstrInfo.inc
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);

View File

@ -34,6 +34,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO.h"
#include <optional>
using namespace llvm;
@ -200,6 +201,11 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void RISCVPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None) {
addPass(createSROAPass());
addPass(createInferAddressSpacesPass());
}
addPass(createAtomicExpandPass());
if (getOptLevel() != CodeGenOpt::None)

View File

@ -1572,7 +1572,6 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs
} // Predicates = [IsRV64], ...
/// Loads
multiclass UniformLdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT> {
def : Pat<(vt (UniformUnaryFrag<LoadOp> (AddrRegImm GPR:$rs1, simm12:$imm12))),
(Inst GPR:$rs1, simm12:$imm12)>;
@ -1587,7 +1586,6 @@ defm : UniformLdPat<zextloadi8, LBU>;
defm : UniformLdPat<zextloadi16, LHU>;
/// Stores
multiclass UniformStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
ValueType vt> {
def : Pat<(UniformBinFrag<StoreOp> (vt StTy:$rs2), (AddrRegImm GPR:$rs1, simm12:$imm12)),

View File

@ -30,13 +30,13 @@ multiclass PatVBin<SDPatternOperator Op, list<RVInst> Insts> {
}
// RVV VX, VI instruction pattern class for binary operations
multiclass PatXIBin<SDPatternOperator Op, list<RVInst> Insts,
multiclass PatXIBin<SDPatternOperator Op, list<RVInst> Insts,
int IsReverseOperation = 0> {
if !eq(IsReverseOperation, 1) then {
def : Pat<(Op GPR:$rs1, VGPR:$rs2), (Insts[0] VGPR:$rs2, GPR:$rs1)>;
def : Pat<(XLenVT (Op uimm5:$imm, (XLenVT VGPR:$rs1))),
(Insts[1] VGPR:$rs1, uimm5:$imm)>;
}
}
else {
def : Pat<(Op VGPR:$rs1, GPR:$rs2), (Insts[0] VGPR:$rs1, GPR:$rs2)>;
def : Pat<(XLenVT (Op (XLenVT VGPR:$rs1), uimm5:$imm)),
@ -44,6 +44,16 @@ multiclass PatXIBin<SDPatternOperator Op, list<RVInst> Insts,
}
}
multiclass DivergentLdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT> {
def : Pat<(vt (DivergentUnaryFrag<LoadOp> (AddrRegImm GPR:$rs1, VGPR:$vs2))),
(Inst GPR:$rs1, VGPR:$vs2)>;
}
multiclass DivergentStPat<PatFrag StoreOp, RVInst Inst, ValueType vt = XLenVT> {
def : Pat<(DivergentBinFrag<StoreOp> (vt VGPR:$vs3), (AddrRegImm GPR:$rs1, VGPR:$vs2)),
(Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>;
}
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@ -112,9 +122,6 @@ def simm5_plus1_nonzero : ImmLeaf<XLenVT,
// Scheduling definitions.
//===----------------------------------------------------------------------===//
class VMVRSched<int n>: Sched <[!cast<SchedReadWrite>("WriteVMov" # n # "V"),
!cast<SchedReadWrite>("ReadVMov" # n # "V")]>;
class VLESched : Sched <[WriteVLDE, ReadVLDX]>;
class VSESched : Sched <[WriteVSTE, ReadVSTEV, ReadVSTX]>;
@ -141,38 +148,24 @@ class VLFSched : Sched <[ReadVLDX]>;
// Instruction class templates
//===----------------------------------------------------------------------===//
//
// FIXME: The encoding for vluxei/vsuxei is not correct!!
//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
// unit-stride load vd, (rs1), vm
class VUnitStrideLoad<RISCVWidth width, string opcodestr>
// vluxei vd, (rs1), vs2 which vs2 is used as offset, not indexing
class VectorLoad<RISCVWidth width, string opcodestr>
: RVInstVLU<0b000, width.Value{3}, LUMOPUnitStride, width.Value{2-0},
(outs VGPR:$vd),
(ins GPRMem:$rs1), opcodestr, "$vd, (${rs1})">;
let RVVConstraint = NoConstraint in {
// unit-stride whole register load vl<nf>r.v vd, (rs1)
class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr, RegisterClass VRC>
: RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg,
width.Value{2-0}, (outs VRC:$vd), (ins GPRMem:$rs1),
opcodestr, "$vd, (${rs1})"> {
let Uses = [];
}
} // RVVConstraint = NoConstraint
(ins GPRMem:$rs1, VGPR:$vs2), opcodestr, "$vd, (${rs1}), $vs2">;
} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
// unit-stride store vd, vs3, (rs1), vm
class VUnitStrideStore<RISCVWidth width, string opcodestr>
// vsuxei vs3, (rs1), vs2 where vs2 is used as offset, not indexing
class VectorStore<RISCVWidth width, string opcodestr>
: RVInstVSU<0b000, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
(outs), (ins VGPR:$vs3, GPRMem:$rs1), opcodestr,
"$vs3, (${rs1})">;
// vs<nf>r.v vd, (rs1)
class VWholeStore<bits<3> nf, string opcodestr, RegisterClass VRC>
: RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg,
0b000, (outs), (ins VRC:$vs3, GPRMem:$rs1),
opcodestr, "$vs3, (${rs1})"> {
let Uses = [];
}
(outs), (ins VGPR:$vs3, GPRMem:$rs1, VGPR:$vs2), opcodestr,
"$vs3, (${rs1}), $vs2">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
@ -628,20 +621,6 @@ multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = sim
Sched<[WriteVNClipI_UpperBound, ReadVNClipV_UpperBound]>;
}
multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
foreach l = [8, 16, 32] in {
defvar w = !cast<RISCVWidth>("LSWidth" # l);
defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");
def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
Sched<[s, ReadVLDX]>;
}
}
multiclass VWholeLoadEEW64<bits<3> nf, string opcodestr, RegisterClass VRC, SchedReadWrite schedrw> {
def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>,
Sched<[schedrw, ReadVLDX]>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
isBranch = 1, isTerminator = 1 in
class BranchCC_vvi<bits<3> funct3, string opcodestr>
@ -660,35 +639,17 @@ def VBLTU : BranchCC_vvi<0b110, "vbltu">;
def VBGEU : BranchCC_vvi<0b111, "vbgeu">;
def JOIN : BranchCC_vvi<0b011, "join">;
foreach eew = [8, 16, 32] in {
defvar w = !cast<RISCVWidth>("LSWidth" # eew);
def VLUXEI8 : VectorLoad<LSWidth8, "vluxei8.v">;
def VLUXEI16 : VectorLoad<LSWidth16, "vluxei16.v">;
def VLUXEI32 : VectorLoad<LSWidth32, "vluxei32.v">;
// Ventus use Vector Unit-Stride Instructions as basic load/store like sALU's
// lb/lh/lw.
// TODO: We may add load multiple byte/short/word version of it.
def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched;
def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched;
}
// TODO: Encoding for these 2 instructions are not correct.
//def VLUXEI8U : VectorLoad<LSWidth8, "vluxei8u.v">;
//def VLUXEI16U : VectorLoad<LSWidth16, "vluxei16u.v">;
let Predicates = [HasVInstructions] in {
defm VL1R : VWholeLoadN<0, "vl1r", VGPR>;
def VS1R_V : VWholeStore<0, "vs1r.v", VGPR>,
Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VGPR:$vd, GPR:$rs1)>;
} // Predicates = [HasVInstructions]
let Predicates = [HasVInstructionsI64] in {
// Vector Unit-Stride Instructions
def VLE64_V : VUnitStrideLoad<LSWidth64, "vle64.v">,
VLESched;
def VSE64_V : VUnitStrideStore<LSWidth64, "vse64.v">,
VSESched;
defm VL1R: VWholeLoadEEW64<0, "vl1r", VGPR, WriteVLD1R>;
} // Predicates = [HasVInstructionsI64]
def VSUXEI8 : VectorStore<LSWidth8, "vsuxei8.v">;
def VSUXEI16 : VectorStore<LSWidth16, "vsuxei16.v">;
def VSUXEI32 : VectorStore<LSWidth32, "vsuxei32.v">;
let Predicates = [HasVInstructions] in {
// Vector Single-Width Integer Add and Subtract
@ -1098,6 +1059,16 @@ defm : PatVBin<DivergentBinFrag<udiv>, [VDIVU_VV, VDIVU_VX]>;
defm : PatVBin<DivergentBinFrag<srem>, [VREM_VV, VREM_VX]>;
defm : PatVBin<DivergentBinFrag<urem>, [VREMU_VV, VREMU_VX]>;
defm : DivergentLdPat<sextloadi8, VLUXEI8>;
defm : DivergentLdPat<extloadi8, VLUXEI8>;
defm : DivergentLdPat<sextloadi16, VLUXEI16>;
defm : DivergentLdPat<extloadi16, VLUXEI16>;
defm : DivergentLdPat<load, VLUXEI32>;
//defm : DivergentLdPat<zextloadi8, VLUXEI8U>;
//defm : DivergentLdPat<zextloadi16, VLUXEI16U>;
defm : DivergentStPat<truncstorei8, VSUXEI8>;
defm : DivergentStPat<truncstorei16, VSUXEI16>;
defm : DivergentStPat<store, VSUXEI32>;
// Match `riscv_brcc` and lower to the appropriate RISC-V branch instruction.
class DivergentBccPat<CondCode Cond, RVInstVB Inst>