Drafting divergent related code, not working yet.

This commit is contained in:
Aries 2022-12-19 18:11:34 +08:00
parent 878fba7d40
commit dee3135130
25 changed files with 523 additions and 503 deletions

View File

@ -15,6 +15,30 @@
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
#endif
//===----------------------------------------------------------------------===//
// Ventus OpenCL builtins.
//===----------------------------------------------------------------------===//
BUILTIN(__builtin_riscv_workgroup_id_x, "Ui", "nc")
BUILTIN(__builtin_riscv_workgroup_id_y, "Ui", "nc")
BUILTIN(__builtin_riscv_workgroup_id_z, "Ui", "nc")
BUILTIN(__builtin_riscv_workitem_id_x, "Ui", "nc")
BUILTIN(__builtin_riscv_workitem_id_y, "Ui", "nc")
BUILTIN(__builtin_riscv_workitem_id_z, "Ui", "nc")
BUILTIN(__builtin_riscv_workgroup_size_x, "Us", "nc")
BUILTIN(__builtin_riscv_workgroup_size_y, "Us", "nc")
BUILTIN(__builtin_riscv_workgroup_size_z, "Us", "nc")
BUILTIN(__builtin_riscv_grid_size_x, "Ui", "nc")
BUILTIN(__builtin_riscv_grid_size_y, "Ui", "nc")
BUILTIN(__builtin_riscv_grid_size_z, "Ui", "nc")
//===----------------------------------------------------------------------===//
// Standard RISCV instruction builtins.
//===----------------------------------------------------------------------===//
// Zbb extension
TARGET_BUILTIN(__builtin_riscv_orc_b_32, "ZiZi", "nc", "zbb")
TARGET_BUILTIN(__builtin_riscv_orc_b_64, "WiWi", "nc", "zbb,64bit")

View File

@ -116,6 +116,10 @@ public:
IntPtrType = SignedInt;
PtrDiffType = SignedInt;
SizeType = UnsignedInt;
//HasLegalHalfType = true;
//HasFloat16 = true;
//resetDataLayout("e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256"
// "-v256:256-v512:512-v1024:1024-n32:64-S128");
resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128");
}
@ -133,6 +137,25 @@ public:
if (ISAInfo->hasExtension("a"))
MaxAtomicInlineWidth = 32;
}
void setSupportedOpenCLOpts() override {
auto &Opts = getSupportedOpenCLOpts();
Opts["cl_khr_fp16"] = true;
Opts["cl_clang_storage_class_specifiers"] = true;
Opts["__cl_clang_variadic_functions"] = true;
Opts["__opencl_c_images"] = true;
Opts["__opencl_c_3d_image_writes"] = true;
}
CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
switch (CC) {
default:
return CCCR_Warning;
case CC_C:
case CC_OpenCLKernel:
return CCCR_OK;
}
}
};
class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
public:

View File

@ -19460,6 +19460,33 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
switch (BuiltinID) {
default: llvm_unreachable("unexpected builtin ID");
// Ventus GPGPU workitem
case RISCV::BI__builtin_riscv_workitem_id_x:
return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_x, 0, 1024);
case RISCV::BI__builtin_riscv_workitem_id_y:
return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_y, 0, 1024);
case RISCV::BI__builtin_riscv_workitem_id_z:
return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_z, 0, 1024);
// FIXME: Use AMDGPU function here temporarily
// Ventus GPGPU workgroup size
case RISCV::BI__builtin_riscv_workgroup_size_x:
return EmitAMDGPUWorkGroupSize(*this, 0);
case RISCV::BI__builtin_riscv_workgroup_size_y:
return EmitAMDGPUWorkGroupSize(*this, 1);
case RISCV::BI__builtin_riscv_workgroup_size_z:
return EmitAMDGPUWorkGroupSize(*this, 2);
// Ventus GPGPU grid size
case RISCV::BI__builtin_riscv_grid_size_x:
return EmitAMDGPUGridSize(*this, 0);
case RISCV::BI__builtin_riscv_grid_size_y:
return EmitAMDGPUGridSize(*this, 1);
case RISCV::BI__builtin_riscv_grid_size_z:
return EmitAMDGPUGridSize(*this, 2);
case RISCV::BI__builtin_riscv_orc_b_32:
case RISCV::BI__builtin_riscv_orc_b_64:
case RISCV::BI__builtin_riscv_clz_32:

7
libclc/riscv/lib/SOURCES Normal file
View File

@ -0,0 +1,7 @@
workitem/get_global_offset.cl
workitem/get_group_id.cl
workitem/get_global_size.cl
workitem/get_local_id.cl
workitem/get_local_size.cl
workitem/get_num_groups.cl
workitem/get_work_dim.cl

View File

@ -0,0 +1,19 @@
#include <clc/clc.h>
#if __clang_major__ >= 8
#define CONST_AS __constant
#elif __clang_major__ >= 7
#define CONST_AS __attribute__((address_space(4)))
#else
#define CONST_AS __attribute__((address_space(2)))
#endif
_CLC_DEF _CLC_OVERLOAD size_t get_global_offset(uint dim) {
assert(0 && "TODO");
/*
CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
if (dim < 3)
return ptr[dim + 1];
*/
return 0;
}

View File

@ -0,0 +1,18 @@
#include <clc/clc.h>
uint __clc_riscv_get_global_size_x(void) __asm("llvm.riscv.read.global.size.x");
uint __clc_riscv_get_global_size_y(void) __asm("llvm.riscv.read.global.size.y");
uint __clc_riscv_get_global_size_z(void) __asm("llvm.riscv.read.global.size.z");
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
switch (dim) {
case 0:
return __clc_riscv_get_global_size_x();
case 1:
return __clc_riscv_get_global_size_y();
case 2:
return __clc_riscv_get_global_size_z();
default:
return 1;
}
}

View File

@ -0,0 +1,14 @@
#include <clc/clc.h>
_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
switch (dim) {
case 0:
return __builtin_riscv_workgroup_id_x();
case 1:
return __builtin_riscv_workgroup_id_y();
case 2:
return __builtin_riscv_workgroup_id_z();
default:
return 1;
}
}

View File

@ -0,0 +1,14 @@
#include <clc/clc.h>
_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
switch (dim) {
case 0:
return __builtin_riscv_workitem_id_x();
case 1:
return __builtin_riscv_workitem_id_y();
case 2:
return __builtin_riscv_workitem_id_z();
default:
return 1;
}
}

View File

@ -0,0 +1,18 @@
#include <clc/clc.h>
uint __clc_riscv_get_local_size_x(void) __asm("llvm.riscv.read.local.size.x");
uint __clc_riscv_get_local_size_y(void) __asm("llvm.riscv.read.local.size.y");
uint __clc_riscv_get_local_size_z(void) __asm("llvm.riscv.read.local.size.z");
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
switch (dim) {
case 0:
return __clc_riscv_get_local_size_x();
case 1:
return __clc_riscv_get_local_size_y();
case 2:
return __clc_riscv_get_local_size_z();
default:
return 1;
}
}

View File

@ -0,0 +1,18 @@
#include <clc/clc.h>
uint __clc_riscv_get_num_groups_x(void) __asm("llvm.riscv.read.ngroups.x");
uint __clc_riscv_get_num_groups_y(void) __asm("llvm.riscv.read.ngroups.y");
uint __clc_riscv_get_num_groups_z(void) __asm("llvm.riscv.read.ngroups.z");
_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
switch (dim) {
case 0:
return __clc_riscv_get_num_groups_x();
case 1:
return __clc_riscv_get_num_groups_y();
case 2:
return __clc_riscv_get_num_groups_z();
default:
return 1;
}
}

View File

@ -0,0 +1,15 @@
#include <clc/clc.h>
#if __clang_major__ >= 8
#define CONST_AS __constant
#elif __clang_major__ >= 7
#define CONST_AS __attribute__((address_space(4)))
#else
#define CONST_AS __attribute__((address_space(2)))
#endif
_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
assert(0 && "TODO");
// CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
// return ptr[0];
}

View File

@ -10,6 +10,40 @@
//
//===----------------------------------------------------------------------===//
class RISCVReadPreloadRegisterIntrinsic
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
class RISCVReadPreloadRegisterIntrinsicNamed<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>,
ClangBuiltin<name>;
// Used to tag image and resource intrinsics with information used to generate
// mem operands.
class RISCVRsrcIntrinsic<int rsrcarg, bit isimage = false> {
int RsrcArg = rsrcarg;
bit IsImage = isimage;
}
multiclass RISCVReadPreloadRegisterIntrinsic_xyz {
def _x : RISCVReadPreloadRegisterIntrinsic;
def _y : RISCVReadPreloadRegisterIntrinsic;
def _z : RISCVReadPreloadRegisterIntrinsic;
}
multiclass RISCVReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
def _x : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
def _y : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
def _z : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
}
//===----------------------------------------------------------------------===//
// ABI Special Intrinsics
defm int_riscv_workitem_id : RISCVReadPreloadRegisterIntrinsic_xyz;
defm int_riscv_workgroup_id : RISCVReadPreloadRegisterIntrinsic_xyz_named
<"__builtin_riscv_workgroup_id">;
//===----------------------------------------------------------------------===//
// Atomics
@ -570,7 +604,7 @@ let TargetPrefix = "riscv" in {
class RISCVClassifyMasked
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
[LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty, LLVMMatchType<1>],
[IntrNoMem, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic {
let VLOperand = 3;

View File

@ -133,7 +133,8 @@ parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) {
unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
bool TailAgnostic, bool MaskAgnostic) {
assert(isValidSEW(SEW) && "Invalid SEW");
unsigned VLMULBits = static_cast<unsigned>(VLMUL);
// LMUL1 encoding is 000
unsigned VLMULBits = 0;
unsigned VSEWBits = encodeSEW(SEW);
unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7);
if (TailAgnostic)
@ -144,57 +145,23 @@ unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
return VTypeI;
}
std::pair<unsigned, bool> RISCVVType::decodeVLMUL(RISCVII::VLMUL VLMUL) {
switch (VLMUL) {
default:
llvm_unreachable("Unexpected LMUL value!");
case RISCVII::VLMUL::LMUL_1:
case RISCVII::VLMUL::LMUL_2:
case RISCVII::VLMUL::LMUL_4:
case RISCVII::VLMUL::LMUL_8:
return std::make_pair(1 << static_cast<unsigned>(VLMUL), false);
case RISCVII::VLMUL::LMUL_F2:
case RISCVII::VLMUL::LMUL_F4:
case RISCVII::VLMUL::LMUL_F8:
return std::make_pair(1 << (8 - static_cast<unsigned>(VLMUL)), true);
}
}
void RISCVVType::printVType(unsigned VType, raw_ostream &OS) {
unsigned Sew = getSEW(VType);
OS << "e" << Sew;
unsigned LMul;
bool Fractional;
std::tie(LMul, Fractional) = decodeVLMUL(getVLMUL(VType));
if (Fractional)
OS << ", mf";
else
OS << ", m";
OS << LMul;
if (isTailAgnostic(VType))
OS << ", ta";
else
OS << ", tu";
if (isMaskAgnostic(VType))
OS << ", ma";
else
OS << ", mu";
}
unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
unsigned LMul;
bool Fractional;
std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
// Convert LMul to a fixed point value with 3 fractional bits.
LMul = Fractional ? (8 / LMul) : (LMul * 8);
assert(SEW >= 8 && "Unexpected SEW value");
return (SEW * 8) / LMul;
OS << "e" << Sew << ", m1, ta, ma";
}
} // namespace llvm
namespace {
struct SourceOfDivergence {
unsigned Intr;
};
const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
#define GET_SourcesOfDivergence_IMPL
#include "RISCVGenSearchableTables.inc"
} // end anonymous namespace
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
return lookupSourceOfDivergence(IntrID);
}

View File

@ -116,10 +116,6 @@ enum VLMUL : uint8_t {
LMUL_F2
};
enum {
TAIL_AGNOSTIC = 1,
MASK_AGNOSTIC = 2,
};
// Helper functions to read TSFlags.
/// \returns the format of the instruction.
@ -131,10 +127,7 @@ static inline VConstraintType getConstraint(uint64_t TSFlags) {
return static_cast<VConstraintType>((TSFlags & ConstraintMask) >>
ConstraintShift);
}
/// \returns the LMUL for the instruction.
static inline VLMUL getLMul(uint64_t TSFlags) {
return static_cast<VLMUL>((TSFlags & VLMulMask) >> VLMulShift);
}
/// \returns true if there is a dummy mask operand for the instruction.
static inline bool hasDummyMaskOp(uint64_t TSFlags) {
return TSFlags & HasDummyMaskOpMask;
@ -143,59 +136,17 @@ static inline bool hasDummyMaskOp(uint64_t TSFlags) {
static inline bool doesForceTailAgnostic(uint64_t TSFlags) {
return TSFlags & ForceTailAgnosticMask;
}
/// \returns true if there is a merge operand for the instruction.
static inline bool hasMergeOp(uint64_t TSFlags) {
return TSFlags & HasMergeOpMask;
}
/// \returns true if there is a SEW operand for the instruction.
static inline bool hasSEWOp(uint64_t TSFlags) {
return TSFlags & HasSEWOpMask;
}
/// \returns true if there is a VL operand for the instruction.
static inline bool hasVLOp(uint64_t TSFlags) {
return TSFlags & HasVLOpMask;
}
/// \returns true if there is a vector policy operand for this instruction.
static inline bool hasVecPolicyOp(uint64_t TSFlags) {
return TSFlags & HasVecPolicyOpMask;
}
/// \returns true if it is a vector widening reduction instruction.
static inline bool isRVVWideningReduction(uint64_t TSFlags) {
return TSFlags & IsRVVWideningReductionMask;
}
/// \returns true if mask policy is valid for the instruction.
static inline bool usesMaskPolicy(uint64_t TSFlags) {
return TSFlags & UsesMaskPolicyMask;
}
static inline unsigned getMergeOpNum(const MCInstrDesc &Desc) {
assert(hasMergeOp(Desc.TSFlags));
assert(!Desc.isVariadic());
return Desc.getNumDefs();
}
static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
// This method is only called if we expect to have a VL operand, and all
// instructions with VL also have SEW.
assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
unsigned Offset = 2;
if (hasVecPolicyOp(TSFlags))
Offset = 3;
return Desc.getNumOperands() - Offset;
}
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
assert(hasSEWOp(TSFlags));
unsigned Offset = 1;
if (hasVecPolicyOp(TSFlags))
Offset = 2;
return Desc.getNumOperands() - Offset;
}
static inline unsigned getVecPolicyOpNum(const MCInstrDesc &Desc) {
assert(hasVecPolicyOp(Desc.TSFlags));
return Desc.getNumOperands() - 1;
}
@ -449,13 +400,12 @@ inline static unsigned getSEW(unsigned VType) {
return decodeVSEW(VSEW);
}
inline static bool isTailAgnostic(unsigned VType) { return VType & 0x40; }
inline static bool isTailAgnostic(unsigned VType) { return 1; }
inline static bool isMaskAgnostic(unsigned VType) { return VType & 0x80; }
inline static bool isMaskAgnostic(unsigned VType) { return 1; }
void printVType(unsigned VType, raw_ostream &OS);
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);
} // namespace RISCVVType

View File

@ -542,6 +542,7 @@ include "VentusInstrInfo.td"
include "GISel/RISCVRegisterBanks.td"
include "RISCVSchedRocket.td"
include "RISCVSchedSiFive7.td"
include "RISCVSearchableTables.td"
//===----------------------------------------------------------------------===//
// RISC-V processors supported.

View File

@ -28,15 +28,6 @@ using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
namespace llvm::RISCV {
#define GET_RISCVVLETable_IMPL
#define GET_RISCVVSETable_IMPL
#define GET_RISCVVLXTable_IMPL
#define GET_RISCVVSXTable_IMPL
#define GET_RISCVMaskedPseudosTable_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace llvm::RISCV
static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
assert(Node->getNumOperands() > 0 && "Node with no operands");
unsigned LastOpIdx = Node->getNumOperands() - 1;
@ -47,104 +38,6 @@ static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
return LastOpIdx;
}
static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID) {
assert(RISCVII::hasVecPolicyOp(MCID.TSFlags));
(void)MCID;
return getLastNonGlueOrChainOpIdx(Node);
}
void RISCVDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
SDNode *N = &*--Position;
if (N->use_empty())
continue;
SDValue Result;
switch (N->getOpcode()) {
case ISD::SPLAT_VECTOR: {
// Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
// SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
MVT VT = N->getSimpleValueType(0);
unsigned Opc =
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
SDLoc DL(N);
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
N->getOperand(0), VL);
break;
}
case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
// Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
// load. Done after lowering and combining so that we have a chance to
// optimize this to VMV_V_X_VL when the upper bits aren't needed.
assert(N->getNumOperands() == 4 && "Unexpected number of operands");
MVT VT = N->getSimpleValueType(0);
SDValue Passthru = N->getOperand(0);
SDValue Lo = N->getOperand(1);
SDValue Hi = N->getOperand(2);
SDValue VL = N->getOperand(3);
assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
"Unexpected VTs!");
MachineFunction &MF = CurDAG->getMachineFunction();
RISCVMachineFunctionInfo *FuncInfo =
MF.getInfo<RISCVMachineFunctionInfo>();
SDLoc DL(N);
// We use the same frame index we use for moving two i32s into 64-bit FPR.
// This is an analogous operation.
int FI = FuncInfo->getMoveF64FrameIndex(MF);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
SDValue StackSlot =
CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
SDValue Chain = CurDAG->getEntryNode();
Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
SDValue OffsetSlot =
CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
Align(8));
Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
SDValue IntID =
CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
SDValue Ops[] = {Chain,
IntID,
Passthru,
StackSlot,
CurDAG->getRegister(RISCV::X0, MVT::i64),
VL};
Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
MVT::i64, MPI, Align(8),
MachineMemOperand::MOLoad);
break;
}
}
if (Result) {
LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\nNew: ");
LLVM_DEBUG(Result->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\n");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
MadeChange = true;
}
}
if (MadeChange)
CurDAG->RemoveDeadNodes();
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
HandleSDNode Dummy(CurDAG->getRoot());
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

View File

@ -37,7 +37,6 @@ public:
return SelectionDAGISel::runOnMachineFunction(MF);
}
void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
void Select(SDNode *Node) override;
@ -108,50 +107,6 @@ private:
bool doPeepholeSExtW(SDNode *Node);
};
namespace RISCV {
struct VLEPseudo {
uint16_t Masked : 1;
uint16_t IsTU : 1;
uint16_t Strided : 1;
uint16_t FF : 1;
uint16_t Log2SEW : 3;
uint16_t LMUL : 3;
uint16_t Pseudo;
};
struct VSEPseudo {
uint16_t Masked :1;
uint16_t Strided : 1;
uint16_t Log2SEW : 3;
uint16_t LMUL : 3;
uint16_t Pseudo;
};
struct VLX_VSXPseudo {
uint16_t Masked : 1;
uint16_t IsTU : 1;
uint16_t Ordered : 1;
uint16_t Log2SEW : 3;
uint16_t LMUL : 3;
uint16_t IndexLMUL : 3;
uint16_t Pseudo;
};
struct RISCVMaskedPseudoInfo {
uint16_t MaskedPseudo;
uint16_t UnmaskedPseudo;
uint16_t UnmaskedTUPseudo;
uint8_t MaskOpIdx;
};
#define GET_RISCVVLETable_DECL
#define GET_RISCVVSETable_DECL
#define GET_RISCVVLXTable_DECL
#define GET_RISCVVSXTable_DECL
#define GET_RISCVMaskedPseudosTable_DECL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCV
} // namespace llvm
#endif

View File

@ -21,7 +21,9 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@ -958,114 +960,6 @@ bool RISCVTargetLowering::
return !XC;
}
bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
switch (Opcode) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::ICmp:
case Instruction::FCmp:
return true;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
return Operand == 1;
default:
return false;
}
}
bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
return false;
if (canSplatOperand(I->getOpcode(), Operand))
return true;
auto *II = dyn_cast<IntrinsicInst>(I);
if (!II)
return false;
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
case Intrinsic::vp_fma:
return Operand == 0 || Operand == 1;
case Intrinsic::vp_shl:
case Intrinsic::vp_lshr:
case Intrinsic::vp_ashr:
case Intrinsic::vp_udiv:
case Intrinsic::vp_sdiv:
case Intrinsic::vp_urem:
case Intrinsic::vp_srem:
return Operand == 1;
// These intrinsics are commutative.
case Intrinsic::vp_add:
case Intrinsic::vp_mul:
case Intrinsic::vp_and:
case Intrinsic::vp_or:
case Intrinsic::vp_xor:
case Intrinsic::vp_fadd:
case Intrinsic::vp_fmul:
// These intrinsics have 'vr' versions.
case Intrinsic::vp_sub:
case Intrinsic::vp_fsub:
case Intrinsic::vp_fdiv:
return Operand == 0 || Operand == 1;
default:
return false;
}
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// splats of scalars can fold into vector instructions.
bool RISCVTargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
using namespace llvm::PatternMatch;
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
return false;
for (auto OpIdx : enumerate(I->operands())) {
if (!canSplatOperand(I, OpIdx.index()))
continue;
Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
// Make sure we are not already sinking this operand
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
// We are looking for a splat that can be sunk.
if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask())))
continue;
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
// and vector registers
for (Use &U : Op->uses()) {
Instruction *Insn = cast<Instruction>(U.getUser());
if (!canSplatOperand(Insn, U.getOperandNo()))
return false;
}
Ops.push_back(&Op->getOperandUse(0));
Ops.push_back(&OpIdx.value());
}
return true;
}
bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
unsigned Opc = VecOp.getOpcode();
@ -1878,7 +1772,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
// Return the value of the return address register, marking it an implicit
// live-in.
Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT, false));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
@ -5635,7 +5529,7 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
MachineRegisterInfo &RegInfo = MF.getRegInfo();
EVT LocVT = VA.getLocVT();
SDValue Val;
const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT(), true);
Register VReg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
return DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
@ -7261,6 +7155,88 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
return OptSize && !VT.isVector();
}
bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
const SDNode *N, FunctionLoweringInfo *FLI,
LegacyDivergenceAnalysis *KDA) const {
switch (N->getOpcode()) {
case ISD::CopyFromReg: {
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
Register Reg = R->getReg();
// FIXME: Why does this need to consider isLiveIn?
if (Reg.isPhysical() || MRI.isLiveIn(Reg))
return !TRI->isSGPRReg(MRI, Reg);
if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
return KDA->isDivergent(V);
return !TRI->isSGPRReg(MRI, Reg);
}
case ISD::LOAD: {
const LoadSDNode *L = cast<LoadSDNode>(N);
unsigned AS = L->getAddressSpace();
// A flat load may access private memory.
// return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
return true;
}
case ISD::CALLSEQ_END:
return true;
case ISD::INTRINSIC_WO_CHAIN:
return RISCVII::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
case ISD::INTRINSIC_W_CHAIN:
return RISCVII::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
/*
case AMDGPUISD::ATOMIC_CMP_SWAP:
case AMDGPUISD::ATOMIC_INC:
case AMDGPUISD::ATOMIC_DEC:
case AMDGPUISD::ATOMIC_LOAD_FMIN:
case AMDGPUISD::ATOMIC_LOAD_FMAX:
case AMDGPUISD::BUFFER_ATOMIC_SWAP:
case AMDGPUISD::BUFFER_ATOMIC_ADD:
case AMDGPUISD::BUFFER_ATOMIC_SUB:
case AMDGPUISD::BUFFER_ATOMIC_SMIN:
case AMDGPUISD::BUFFER_ATOMIC_UMIN:
case AMDGPUISD::BUFFER_ATOMIC_SMAX:
case AMDGPUISD::BUFFER_ATOMIC_UMAX:
case AMDGPUISD::BUFFER_ATOMIC_AND:
case AMDGPUISD::BUFFER_ATOMIC_OR:
case AMDGPUISD::BUFFER_ATOMIC_XOR:
case AMDGPUISD::BUFFER_ATOMIC_INC:
case AMDGPUISD::BUFFER_ATOMIC_DEC:
case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP:
case AMDGPUISD::BUFFER_ATOMIC_CSUB:
case AMDGPUISD::BUFFER_ATOMIC_FADD:
case AMDGPUISD::BUFFER_ATOMIC_FMIN:
case AMDGPUISD::BUFFER_ATOMIC_FMAX:
// Target-specific read-modify-write atomics are sources of divergence.
return true;
*/
default:
if (auto *A = dyn_cast<AtomicSDNode>(N)) {
// Generic read-modify-write atomics are sources of divergence.
return A->readMem() && A->writeMem();
}
return false;
}
}
// TODO: Support child registers
const TargetRegisterClass *
RISCVTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
if (!TRI->isSGPRClass(RC) && !isDivergent)
return &RISCV::GPRRegClass;
else if (TRI->isSGPRClass(RC) && isDivergent)
return &RISCV::VGPRRegClass;
return RC;
}
#define GET_REGISTER_MATCHER
#include "RISCVGenAsmMatcher.inc"

View File

@ -363,20 +363,14 @@ public:
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;
/// Return true if the (vector) instruction I will be lowered to an instruction
/// with a scalar splat operand for the given Operand number.
bool canSplatOperand(Instruction *I, int Operand) const;
/// Return true if a vector instruction will lower to a target instruction
/// able to splat the given operand.
bool canSplatOperand(unsigned Opcode, int Operand) const;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
bool shouldScalarizeBinop(SDValue VecOp) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const override;
bool isSDNodeSourceOfDivergence(const SDNode *N,
FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
@ -495,6 +489,9 @@ public:
Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
const TargetRegisterClass *getRegClassFor(MVT VT,
bool isDivergent) const override;
// Lower incoming arguments, copy physregs into vregs
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,

View File

@ -1140,32 +1140,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
}
const uint64_t TSFlags = Desc.TSFlags;
if (RISCVII::hasMergeOp(TSFlags)) {
unsigned OpIdx = RISCVII::getMergeOpNum(Desc);
if (MI.findTiedOperandIdx(0) != OpIdx) {
ErrInfo = "Merge op improperly tied";
return false;
}
}
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
if (!Op.isImm() && !Op.isReg()) {
ErrInfo = "Invalid operand type for VL operand";
return false;
}
if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
auto *RC = MRI.getRegClass(Op.getReg());
if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
ErrInfo = "Invalid register class for VL operand";
return false;
}
}
if (!RISCVII::hasSEWOp(TSFlags)) {
ErrInfo = "VL operand w/o SEW operand?";
return false;
}
}
if (RISCVII::hasSEWOp(TSFlags)) {
unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
@ -1179,27 +1153,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
return false;
}
}
if (RISCVII::hasVecPolicyOp(TSFlags)) {
unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
uint64_t Policy = MI.getOperand(OpIdx).getImm();
if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
ErrInfo = "Invalid Policy Value";
return false;
}
if (!RISCVII::hasVLOp(TSFlags)) {
ErrInfo = "policy operand w/o VL operand?";
return false;
}
// VecPolicy operands can only exist on instructions with passthru/merge
// arguments. Note that not all arguments with passthru have vec policy
// operands- some instructions have implicit policies.
unsigned UseOpIdx;
if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
ErrInfo = "policy operand w/o tied operand?";
return false;
}
}
return true;
}
@ -1478,17 +1431,6 @@ std::string RISCVInstrInfo::createMIROperandComment(
uint64_t TSFlags = MI.getDesc().TSFlags;
// Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
// operand of vector codegen pseudos.
if (RISCVII::hasVecPolicyOp(TSFlags) &&
OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
unsigned Policy = MI.getOperand(OpIdx).getImm();
assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
"Invalid Policy Value");
OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
<< (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
}
OS.flush();
return Comment;
}

View File

@ -103,13 +103,6 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasBP(MF))
markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp
// V registers for code generation. We handle them manually.
markSuperRegs(Reserved, RISCV::VL);
markSuperRegs(Reserved, RISCV::VTYPE);
markSuperRegs(Reserved, RISCV::VXSAT);
markSuperRegs(Reserved, RISCV::VXRM);
markSuperRegs(Reserved, RISCV::VLENB); // vlenb (constant)
// Floating point environment registers.
markSuperRegs(Reserved, RISCV::FRM);
markSuperRegs(Reserved, RISCV::FFLAGS);
@ -161,6 +154,39 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return true;
}
bool RISCVRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
Register Reg) const {
const TargetRegisterClass *RC;
if (Reg.isVirtual())
RC = MRI.getRegClass(Reg);
else
RC = getPhysRegClass(Reg);
return RC ? isSGPRClass(RC) : false;
}
const TargetRegisterClass *
RISCVRegisterInfo::getPhysRegClass(MCRegister Reg) const {
static const TargetRegisterClass *const BaseClasses[] = {
/*
&RISCV::VGPR_LO16RegClass,
&RISCV::VGPR_HI16RegClass,
&RISCV::SReg_LO16RegClass,
&RISCV::SReg_HI16RegClass,
&RISCV::SReg_32RegClass,
*/
&RISCV::VGPRRegClass,
&RISCV::GPRRegClass,
};
for (const TargetRegisterClass *BaseClass : BaseClasses) {
if (BaseClass->contains(Reg)) {
return BaseClass;
}
}
assert(0 && "TODO: Add sub/super registers");
return nullptr;
}
void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator II,
const DebugLoc &DL, Register DestReg,

View File

@ -20,10 +20,43 @@
namespace llvm {
// This needs to be kept in sync with the field bits in VentusRegisterClass.
enum RISCVRCFlags {
IsVGPR = 1 << 0,
IsSGPR = 1 << 1
}; // enum RISCVRCFlags
struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
RISCVRegisterInfo(unsigned HwMode);
/// \returns true if this class contains VGPR registers.
static bool hasVGPRs(const TargetRegisterClass *RC) {
return RC->TSFlags & RISCVRCFlags::IsVGPR;
}
/// \returns true if this class contains SGPR registers.
static bool hasSGPRs(const TargetRegisterClass *RC) {
return RC->TSFlags & RISCVRCFlags::IsSGPR;
}
/// Return the 'base' register class for this register.
/// e.g. X5 => SReg_32, V3 => VGPR_32, X5_X6 -> SReg_32, etc.
const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const;
/// \returns true if this class contains only SGPR registers
static bool isSGPRClass(const TargetRegisterClass *RC) {
return hasSGPRs(RC) && !hasVGPRs(RC);
}
/// \returns true if this class ID contains only SGPR registers
bool isSGPRClassID(unsigned RCID) const {
return isSGPRClass(getRegClass(RCID));
}
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
@ -33,6 +66,10 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
bool isAsmClobberable(const MachineFunction &MF,
MCRegister PhysReg) const override;
bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
return !isSGPRClass(RC);
}
const uint32_t *getNoPreservedMask() const override;
bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,

View File

@ -0,0 +1,75 @@
//===-- RISCVSearchableTables.td ---------------------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Resource intrinsics table.
//===----------------------------------------------------------------------===//
class RsrcIntrinsic<RISCVRsrcIntrinsic intr> {
Intrinsic Intr = !cast<Intrinsic>(intr);
bits<8> RsrcArg = intr.RsrcArg;
bit IsImage = intr.IsImage;
}
/*
def RsrcIntrinsics : GenericTable {
let FilterClass = "RsrcIntrinsic";
let Fields = ["Intr", "RsrcArg", "IsImage"];
let PrimaryKey = ["Intr"];
let PrimaryKeyName = "lookupRsrcIntrinsic";
}
foreach intr = !listconcat(RISCVBufferIntrinsics,
RISCVImageDimIntrinsics,
RISCVImageDimAtomicIntrinsics) in {
def : RsrcIntrinsic<!cast<RISCVRsrcIntrinsic>(intr)>;
}
*/
class VentusBufferFormatBase<bits<8> f, bits<8> bpc, bits<8> numc,
bits<8> nfmt, bits<8> dfmt> {
bits<8> Format = f;
bits<8> BitsPerComp = bpc;
bits<8> NumComponents = numc;
bits<8> NumFormat = nfmt;
bits<8> DataFormat = dfmt;
}
class VentusBufferFormatTable : GenericTable {
let CppTypeName = "GcnBufferFormatInfo";
let Fields = ["Format", "BitsPerComp", "NumComponents", "NumFormat", "DataFormat"];
let PrimaryKey = ["BitsPerComp", "NumComponents", "NumFormat"];
}
class SourceOfDivergence<Intrinsic intr> {
Intrinsic Intr = intr;
}
def SourcesOfDivergence : GenericTable {
let FilterClass = "SourceOfDivergence";
let Fields = ["Intr"];
let PrimaryKey = ["Intr"];
let PrimaryKeyName = "lookupSourceOfDivergence";
}
def : SourceOfDivergence<int_riscv_workitem_id_x>;
def : SourceOfDivergence<int_riscv_workitem_id_y>;
def : SourceOfDivergence<int_riscv_workitem_id_z>;
//def : SourceOfDivergence<int_riscv_interp_mov>;
//def : SourceOfDivergence<int_riscv_interp_p1>;
// The dummy boolean output is divergent from the IR's perspective,
// but the mask results are uniform. These produce a divergent and
// uniform result, so the returned struct is collectively divergent.
// isAlwaysUniform can override the extract of the uniform component.
//def : SourceOfDivergence<int_riscv_if>;
//def : SourceOfDivergence<int_riscv_else>;
//def : SourceOfDivergence<int_riscv_loop>;
//foreach intr = RISCVImageDimAtomicIntrinsics in
//def : SourceOfDivergence<intr>;

View File

@ -258,58 +258,8 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
// TODO: Handle scalar type.
if (!LT.second.isVector())
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Args, CxtI);
auto getConstantMatCost =
[&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))
// Two sub-cases:
// * Has a 5 bit immediate operand which can be splatted.
// * Has a larger immediate which must be materialized in scalar register
// We return 0 for both as we currently ignore the cost of materializing
// scalar constants in GPRs.
return 0;
// Add a cost of address generation + the cost of the vector load. The
// address is expected to be a PC relative offset to a constant pool entry
// using auipc/addi.
return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
/*AddressSpace=*/0, CostKind);
};
// Add the cost of materializing any constant vectors required.
InstructionCost ConstantMatCost = 0;
if (Op1Info.isConstant())
ConstantMatCost += getConstantMatCost(0, Op1Info);
if (Op2Info.isConstant())
ConstantMatCost += getConstantMatCost(1, Op2Info);
switch (TLI->InstructionOpcodeToISD(Opcode)) {
case ISD::ADD:
case ISD::SUB:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
case ISD::MUL:
case ISD::MULHS:
case ISD::MULHU:
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
case ISD::FNEG: {
return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
}
default:
return ConstantMatCost +
BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Args, CxtI);
}
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
Args, CxtI);
}
void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

View File

@ -27,6 +27,16 @@ let Namespace = "RISCV" in {
// vALUop vd, vs0, vs1
// where the first instruction tells HW uses v[0-31] for vd, vs1,
// uses v[32-63] for vs0.
class RVRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
: RegisterClass <n, rTypes, Align, rList> {
// vALU and sALU registers
field bit IsVGPR = 0;
field bit IsSGPR = 0;
let TSFlags{0} = IsVGPR;
let TSFlags{1} = IsSGPR;
}
class RISCVReg<bits<8> Enc, string n, list<string> alt = []> : Register<n> {
let HWEncoding{7-0} = Enc;
let AltNames = alt;
@ -235,7 +245,7 @@ def XLenRI : RegInfoByHwMode<
// The order of registers represents the preferred allocation sequence.
// Registers are listed in the order caller-save, callee-save, specials.
def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
def GPR : RVRegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 10, 17),
(sequence "X%u", 5, 7),
(sequence "X%u", 28, 31),
@ -244,49 +254,57 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 0, 4)
)> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
def GPRX0 : RVRegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
def GPRNoX0 : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
def GPRNoX0X2 : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
// Don't use X1 or X5 for JALR since that is a hint to pop the return address
// stack on some microarchitectures. Also remove the reserved registers X0, X2,
// X3, and X4 as it reduces the number of register classes that get synthesized
// by tablegen.
def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
def GPRJALR : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
def GPRC : RVRegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 10, 15),
(sequence "X%u", 8, 9)
)> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
// For indirect tail calls, we can't use callee-saved registers, as they are
// restored to the saved value before the tail call, which would clobber a call
// address. We shouldn't use x5 since that is a hint for to pop the return
// address stack on some microarchitectures.
def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
def GPRTC : RVRegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 6, 7),
(sequence "X%u", 10, 17),
(sequence "X%u", 28, 31)
)> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
def SP : RVRegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
let RegInfos = XLenRI;
let IsSGPR = 1;
}
// Floating point registers
@ -466,11 +484,13 @@ def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16]>;
// TODO: Add half and register pair support!!
// FIMXE: BranchCC_vvi doesn't support f32 VGPR in pattern(riscv_brcc).
// VGPR 32-bit registers class
def VGPR : RegisterClass<"RISCV",
def VGPR : RVRegisterClass<"RISCV",
// !listconcat(Reg32Types.types, Reg16Types.types),
[i32/*, f32*/],
32,
(add (sequence "V%u", 0, 255))>;
(add (sequence "V%u", 0, 255))> {
let IsVGPR = 1;
}
// VGPR 64-bit registers class
def VGPR_64 : VentusRegisterTuples<getSubRegs<2>.ret, VGPR, 255, 1, 2, "v">;