Drafting divergent related code, not working yet.
This commit is contained in:
parent
878fba7d40
commit
dee3135130
|
@ -15,6 +15,30 @@
|
|||
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
|
||||
#endif
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Ventus OpenCL builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
BUILTIN(__builtin_riscv_workgroup_id_x, "Ui", "nc")
|
||||
BUILTIN(__builtin_riscv_workgroup_id_y, "Ui", "nc")
|
||||
BUILTIN(__builtin_riscv_workgroup_id_z, "Ui", "nc")
|
||||
|
||||
BUILTIN(__builtin_riscv_workitem_id_x, "Ui", "nc")
|
||||
BUILTIN(__builtin_riscv_workitem_id_y, "Ui", "nc")
|
||||
BUILTIN(__builtin_riscv_workitem_id_z, "Ui", "nc")
|
||||
|
||||
BUILTIN(__builtin_riscv_workgroup_size_x, "Us", "nc")
|
||||
BUILTIN(__builtin_riscv_workgroup_size_y, "Us", "nc")
|
||||
BUILTIN(__builtin_riscv_workgroup_size_z, "Us", "nc")
|
||||
|
||||
BUILTIN(__builtin_riscv_grid_size_x, "Ui", "nc")
|
||||
BUILTIN(__builtin_riscv_grid_size_y, "Ui", "nc")
|
||||
BUILTIN(__builtin_riscv_grid_size_z, "Ui", "nc")
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Standard RISCV instruction builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Zbb extension
|
||||
TARGET_BUILTIN(__builtin_riscv_orc_b_32, "ZiZi", "nc", "zbb")
|
||||
TARGET_BUILTIN(__builtin_riscv_orc_b_64, "WiWi", "nc", "zbb,64bit")
|
||||
|
|
|
@ -116,6 +116,10 @@ public:
|
|||
IntPtrType = SignedInt;
|
||||
PtrDiffType = SignedInt;
|
||||
SizeType = UnsignedInt;
|
||||
//HasLegalHalfType = true;
|
||||
//HasFloat16 = true;
|
||||
//resetDataLayout("e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256"
|
||||
// "-v256:256-v512:512-v1024:1024-n32:64-S128");
|
||||
resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128");
|
||||
}
|
||||
|
||||
|
@ -133,6 +137,25 @@ public:
|
|||
if (ISAInfo->hasExtension("a"))
|
||||
MaxAtomicInlineWidth = 32;
|
||||
}
|
||||
|
||||
void setSupportedOpenCLOpts() override {
|
||||
auto &Opts = getSupportedOpenCLOpts();
|
||||
Opts["cl_khr_fp16"] = true;
|
||||
Opts["cl_clang_storage_class_specifiers"] = true;
|
||||
Opts["__cl_clang_variadic_functions"] = true;
|
||||
Opts["__opencl_c_images"] = true;
|
||||
Opts["__opencl_c_3d_image_writes"] = true;
|
||||
}
|
||||
|
||||
CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
|
||||
switch (CC) {
|
||||
default:
|
||||
return CCCR_Warning;
|
||||
case CC_C:
|
||||
case CC_OpenCLKernel:
|
||||
return CCCR_OK;
|
||||
}
|
||||
}
|
||||
};
|
||||
class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
|
||||
public:
|
||||
|
|
|
@ -19460,6 +19460,33 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
|
|||
llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
|
||||
switch (BuiltinID) {
|
||||
default: llvm_unreachable("unexpected builtin ID");
|
||||
|
||||
// Ventus GPGPU workitem
|
||||
case RISCV::BI__builtin_riscv_workitem_id_x:
|
||||
return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_x, 0, 1024);
|
||||
case RISCV::BI__builtin_riscv_workitem_id_y:
|
||||
return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_y, 0, 1024);
|
||||
case RISCV::BI__builtin_riscv_workitem_id_z:
|
||||
return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_z, 0, 1024);
|
||||
|
||||
// FIXME: Use AMDGPU function here temporarily
|
||||
|
||||
// Ventus GPGPU workgroup size
|
||||
case RISCV::BI__builtin_riscv_workgroup_size_x:
|
||||
return EmitAMDGPUWorkGroupSize(*this, 0);
|
||||
case RISCV::BI__builtin_riscv_workgroup_size_y:
|
||||
return EmitAMDGPUWorkGroupSize(*this, 1);
|
||||
case RISCV::BI__builtin_riscv_workgroup_size_z:
|
||||
return EmitAMDGPUWorkGroupSize(*this, 2);
|
||||
|
||||
// Ventus GPGPU grid size
|
||||
case RISCV::BI__builtin_riscv_grid_size_x:
|
||||
return EmitAMDGPUGridSize(*this, 0);
|
||||
case RISCV::BI__builtin_riscv_grid_size_y:
|
||||
return EmitAMDGPUGridSize(*this, 1);
|
||||
case RISCV::BI__builtin_riscv_grid_size_z:
|
||||
return EmitAMDGPUGridSize(*this, 2);
|
||||
|
||||
case RISCV::BI__builtin_riscv_orc_b_32:
|
||||
case RISCV::BI__builtin_riscv_orc_b_64:
|
||||
case RISCV::BI__builtin_riscv_clz_32:
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
workitem/get_global_offset.cl
|
||||
workitem/get_group_id.cl
|
||||
workitem/get_global_size.cl
|
||||
workitem/get_local_id.cl
|
||||
workitem/get_local_size.cl
|
||||
workitem/get_num_groups.cl
|
||||
workitem/get_work_dim.cl
|
|
@ -0,0 +1,19 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
#if __clang_major__ >= 8
|
||||
#define CONST_AS __constant
|
||||
#elif __clang_major__ >= 7
|
||||
#define CONST_AS __attribute__((address_space(4)))
|
||||
#else
|
||||
#define CONST_AS __attribute__((address_space(2)))
|
||||
#endif
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_global_offset(uint dim) {
|
||||
assert(0 && "TODO");
|
||||
/*
|
||||
CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
|
||||
if (dim < 3)
|
||||
return ptr[dim + 1];
|
||||
*/
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
uint __clc_riscv_get_global_size_x(void) __asm("llvm.riscv.read.global.size.x");
|
||||
uint __clc_riscv_get_global_size_y(void) __asm("llvm.riscv.read.global.size.y");
|
||||
uint __clc_riscv_get_global_size_z(void) __asm("llvm.riscv.read.global.size.z");
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __clc_riscv_get_global_size_x();
|
||||
case 1:
|
||||
return __clc_riscv_get_global_size_y();
|
||||
case 2:
|
||||
return __clc_riscv_get_global_size_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __builtin_riscv_workgroup_id_x();
|
||||
case 1:
|
||||
return __builtin_riscv_workgroup_id_y();
|
||||
case 2:
|
||||
return __builtin_riscv_workgroup_id_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __builtin_riscv_workitem_id_x();
|
||||
case 1:
|
||||
return __builtin_riscv_workitem_id_y();
|
||||
case 2:
|
||||
return __builtin_riscv_workitem_id_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
uint __clc_riscv_get_local_size_x(void) __asm("llvm.riscv.read.local.size.x");
|
||||
uint __clc_riscv_get_local_size_y(void) __asm("llvm.riscv.read.local.size.y");
|
||||
uint __clc_riscv_get_local_size_z(void) __asm("llvm.riscv.read.local.size.z");
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __clc_riscv_get_local_size_x();
|
||||
case 1:
|
||||
return __clc_riscv_get_local_size_y();
|
||||
case 2:
|
||||
return __clc_riscv_get_local_size_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
uint __clc_riscv_get_num_groups_x(void) __asm("llvm.riscv.read.ngroups.x");
|
||||
uint __clc_riscv_get_num_groups_y(void) __asm("llvm.riscv.read.ngroups.y");
|
||||
uint __clc_riscv_get_num_groups_z(void) __asm("llvm.riscv.read.ngroups.z");
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
|
||||
switch (dim) {
|
||||
case 0:
|
||||
return __clc_riscv_get_num_groups_x();
|
||||
case 1:
|
||||
return __clc_riscv_get_num_groups_y();
|
||||
case 2:
|
||||
return __clc_riscv_get_num_groups_z();
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
#include <clc/clc.h>
|
||||
|
||||
#if __clang_major__ >= 8
|
||||
#define CONST_AS __constant
|
||||
#elif __clang_major__ >= 7
|
||||
#define CONST_AS __attribute__((address_space(4)))
|
||||
#else
|
||||
#define CONST_AS __attribute__((address_space(2)))
|
||||
#endif
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
|
||||
assert(0 && "TODO");
|
||||
// CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
|
||||
// return ptr[0];
|
||||
}
|
|
@ -10,6 +10,40 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class RISCVReadPreloadRegisterIntrinsic
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
|
||||
|
||||
class RISCVReadPreloadRegisterIntrinsicNamed<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>,
|
||||
ClangBuiltin<name>;
|
||||
|
||||
// Used to tag image and resource intrinsics with information used to generate
|
||||
// mem operands.
|
||||
class RISCVRsrcIntrinsic<int rsrcarg, bit isimage = false> {
|
||||
int RsrcArg = rsrcarg;
|
||||
bit IsImage = isimage;
|
||||
}
|
||||
|
||||
|
||||
multiclass RISCVReadPreloadRegisterIntrinsic_xyz {
|
||||
def _x : RISCVReadPreloadRegisterIntrinsic;
|
||||
def _y : RISCVReadPreloadRegisterIntrinsic;
|
||||
def _z : RISCVReadPreloadRegisterIntrinsic;
|
||||
}
|
||||
|
||||
multiclass RISCVReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
|
||||
def _x : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
|
||||
def _y : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
|
||||
def _z : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ABI Special Intrinsics
|
||||
|
||||
defm int_riscv_workitem_id : RISCVReadPreloadRegisterIntrinsic_xyz;
|
||||
defm int_riscv_workgroup_id : RISCVReadPreloadRegisterIntrinsic_xyz_named
|
||||
<"__builtin_riscv_workgroup_id">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Atomics
|
||||
|
||||
|
@ -570,7 +604,7 @@ let TargetPrefix = "riscv" in {
|
|||
class RISCVClassifyMasked
|
||||
: Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
|
||||
[LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
llvm_anyint_ty, LLVMMatchType<1>],
|
||||
[IntrNoMem, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic {
|
||||
let VLOperand = 3;
|
||||
|
|
|
@ -133,7 +133,8 @@ parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) {
|
|||
unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
|
||||
bool TailAgnostic, bool MaskAgnostic) {
|
||||
assert(isValidSEW(SEW) && "Invalid SEW");
|
||||
unsigned VLMULBits = static_cast<unsigned>(VLMUL);
|
||||
// LMUL1 encoding is 000
|
||||
unsigned VLMULBits = 0;
|
||||
unsigned VSEWBits = encodeSEW(SEW);
|
||||
unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7);
|
||||
if (TailAgnostic)
|
||||
|
@ -144,57 +145,23 @@ unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
|
|||
return VTypeI;
|
||||
}
|
||||
|
||||
std::pair<unsigned, bool> RISCVVType::decodeVLMUL(RISCVII::VLMUL VLMUL) {
|
||||
switch (VLMUL) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected LMUL value!");
|
||||
case RISCVII::VLMUL::LMUL_1:
|
||||
case RISCVII::VLMUL::LMUL_2:
|
||||
case RISCVII::VLMUL::LMUL_4:
|
||||
case RISCVII::VLMUL::LMUL_8:
|
||||
return std::make_pair(1 << static_cast<unsigned>(VLMUL), false);
|
||||
case RISCVII::VLMUL::LMUL_F2:
|
||||
case RISCVII::VLMUL::LMUL_F4:
|
||||
case RISCVII::VLMUL::LMUL_F8:
|
||||
return std::make_pair(1 << (8 - static_cast<unsigned>(VLMUL)), true);
|
||||
}
|
||||
}
|
||||
|
||||
void RISCVVType::printVType(unsigned VType, raw_ostream &OS) {
|
||||
unsigned Sew = getSEW(VType);
|
||||
OS << "e" << Sew;
|
||||
|
||||
unsigned LMul;
|
||||
bool Fractional;
|
||||
std::tie(LMul, Fractional) = decodeVLMUL(getVLMUL(VType));
|
||||
|
||||
if (Fractional)
|
||||
OS << ", mf";
|
||||
else
|
||||
OS << ", m";
|
||||
OS << LMul;
|
||||
|
||||
if (isTailAgnostic(VType))
|
||||
OS << ", ta";
|
||||
else
|
||||
OS << ", tu";
|
||||
|
||||
if (isMaskAgnostic(VType))
|
||||
OS << ", ma";
|
||||
else
|
||||
OS << ", mu";
|
||||
}
|
||||
|
||||
unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
|
||||
unsigned LMul;
|
||||
bool Fractional;
|
||||
std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
|
||||
|
||||
// Convert LMul to a fixed point value with 3 fractional bits.
|
||||
LMul = Fractional ? (8 / LMul) : (LMul * 8);
|
||||
|
||||
assert(SEW >= 8 && "Unexpected SEW value");
|
||||
return (SEW * 8) / LMul;
|
||||
OS << "e" << Sew << ", m1, ta, ma";
|
||||
}
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
namespace {
|
||||
struct SourceOfDivergence {
|
||||
unsigned Intr;
|
||||
};
|
||||
const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
|
||||
|
||||
#define GET_SourcesOfDivergence_IMPL
|
||||
#include "RISCVGenSearchableTables.inc"
|
||||
} // end anonymous namespace
|
||||
|
||||
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
|
||||
return lookupSourceOfDivergence(IntrID);
|
||||
}
|
||||
|
|
|
@ -116,10 +116,6 @@ enum VLMUL : uint8_t {
|
|||
LMUL_F2
|
||||
};
|
||||
|
||||
enum {
|
||||
TAIL_AGNOSTIC = 1,
|
||||
MASK_AGNOSTIC = 2,
|
||||
};
|
||||
|
||||
// Helper functions to read TSFlags.
|
||||
/// \returns the format of the instruction.
|
||||
|
@ -131,10 +127,7 @@ static inline VConstraintType getConstraint(uint64_t TSFlags) {
|
|||
return static_cast<VConstraintType>((TSFlags & ConstraintMask) >>
|
||||
ConstraintShift);
|
||||
}
|
||||
/// \returns the LMUL for the instruction.
|
||||
static inline VLMUL getLMul(uint64_t TSFlags) {
|
||||
return static_cast<VLMUL>((TSFlags & VLMulMask) >> VLMulShift);
|
||||
}
|
||||
|
||||
/// \returns true if there is a dummy mask operand for the instruction.
|
||||
static inline bool hasDummyMaskOp(uint64_t TSFlags) {
|
||||
return TSFlags & HasDummyMaskOpMask;
|
||||
|
@ -143,59 +136,17 @@ static inline bool hasDummyMaskOp(uint64_t TSFlags) {
|
|||
static inline bool doesForceTailAgnostic(uint64_t TSFlags) {
|
||||
return TSFlags & ForceTailAgnosticMask;
|
||||
}
|
||||
/// \returns true if there is a merge operand for the instruction.
|
||||
static inline bool hasMergeOp(uint64_t TSFlags) {
|
||||
return TSFlags & HasMergeOpMask;
|
||||
}
|
||||
/// \returns true if there is a SEW operand for the instruction.
|
||||
static inline bool hasSEWOp(uint64_t TSFlags) {
|
||||
return TSFlags & HasSEWOpMask;
|
||||
}
|
||||
/// \returns true if there is a VL operand for the instruction.
|
||||
static inline bool hasVLOp(uint64_t TSFlags) {
|
||||
return TSFlags & HasVLOpMask;
|
||||
}
|
||||
/// \returns true if there is a vector policy operand for this instruction.
|
||||
static inline bool hasVecPolicyOp(uint64_t TSFlags) {
|
||||
return TSFlags & HasVecPolicyOpMask;
|
||||
}
|
||||
/// \returns true if it is a vector widening reduction instruction.
|
||||
static inline bool isRVVWideningReduction(uint64_t TSFlags) {
|
||||
return TSFlags & IsRVVWideningReductionMask;
|
||||
}
|
||||
/// \returns true if mask policy is valid for the instruction.
|
||||
static inline bool usesMaskPolicy(uint64_t TSFlags) {
|
||||
return TSFlags & UsesMaskPolicyMask;
|
||||
}
|
||||
|
||||
static inline unsigned getMergeOpNum(const MCInstrDesc &Desc) {
|
||||
assert(hasMergeOp(Desc.TSFlags));
|
||||
assert(!Desc.isVariadic());
|
||||
return Desc.getNumDefs();
|
||||
}
|
||||
|
||||
static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
|
||||
const uint64_t TSFlags = Desc.TSFlags;
|
||||
// This method is only called if we expect to have a VL operand, and all
|
||||
// instructions with VL also have SEW.
|
||||
assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
|
||||
unsigned Offset = 2;
|
||||
if (hasVecPolicyOp(TSFlags))
|
||||
Offset = 3;
|
||||
return Desc.getNumOperands() - Offset;
|
||||
}
|
||||
/// \returns true if the intrinsic is divergent
|
||||
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
|
||||
|
||||
static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
|
||||
const uint64_t TSFlags = Desc.TSFlags;
|
||||
assert(hasSEWOp(TSFlags));
|
||||
unsigned Offset = 1;
|
||||
if (hasVecPolicyOp(TSFlags))
|
||||
Offset = 2;
|
||||
return Desc.getNumOperands() - Offset;
|
||||
}
|
||||
|
||||
static inline unsigned getVecPolicyOpNum(const MCInstrDesc &Desc) {
|
||||
assert(hasVecPolicyOp(Desc.TSFlags));
|
||||
return Desc.getNumOperands() - 1;
|
||||
}
|
||||
|
||||
|
@ -449,13 +400,12 @@ inline static unsigned getSEW(unsigned VType) {
|
|||
return decodeVSEW(VSEW);
|
||||
}
|
||||
|
||||
inline static bool isTailAgnostic(unsigned VType) { return VType & 0x40; }
|
||||
inline static bool isTailAgnostic(unsigned VType) { return 1; }
|
||||
|
||||
inline static bool isMaskAgnostic(unsigned VType) { return VType & 0x80; }
|
||||
inline static bool isMaskAgnostic(unsigned VType) { return 1; }
|
||||
|
||||
void printVType(unsigned VType, raw_ostream &OS);
|
||||
|
||||
unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);
|
||||
|
||||
} // namespace RISCVVType
|
||||
|
||||
|
|
|
@ -542,6 +542,7 @@ include "VentusInstrInfo.td"
|
|||
include "GISel/RISCVRegisterBanks.td"
|
||||
include "RISCVSchedRocket.td"
|
||||
include "RISCVSchedSiFive7.td"
|
||||
include "RISCVSearchableTables.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// RISC-V processors supported.
|
||||
|
|
|
@ -28,15 +28,6 @@ using namespace llvm;
|
|||
|
||||
#define DEBUG_TYPE "riscv-isel"
|
||||
|
||||
namespace llvm::RISCV {
|
||||
#define GET_RISCVVLETable_IMPL
|
||||
#define GET_RISCVVSETable_IMPL
|
||||
#define GET_RISCVVLXTable_IMPL
|
||||
#define GET_RISCVVSXTable_IMPL
|
||||
#define GET_RISCVMaskedPseudosTable_IMPL
|
||||
#include "RISCVGenSearchableTables.inc"
|
||||
} // namespace llvm::RISCV
|
||||
|
||||
static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
|
||||
assert(Node->getNumOperands() > 0 && "Node with no operands");
|
||||
unsigned LastOpIdx = Node->getNumOperands() - 1;
|
||||
|
@ -47,104 +38,6 @@ static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
|
|||
return LastOpIdx;
|
||||
}
|
||||
|
||||
static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID) {
|
||||
assert(RISCVII::hasVecPolicyOp(MCID.TSFlags));
|
||||
(void)MCID;
|
||||
return getLastNonGlueOrChainOpIdx(Node);
|
||||
}
|
||||
|
||||
void RISCVDAGToDAGISel::PreprocessISelDAG() {
|
||||
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
|
||||
|
||||
bool MadeChange = false;
|
||||
while (Position != CurDAG->allnodes_begin()) {
|
||||
SDNode *N = &*--Position;
|
||||
if (N->use_empty())
|
||||
continue;
|
||||
|
||||
SDValue Result;
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::SPLAT_VECTOR: {
|
||||
// Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
|
||||
// SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
unsigned Opc =
|
||||
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
|
||||
SDLoc DL(N);
|
||||
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
|
||||
Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
|
||||
N->getOperand(0), VL);
|
||||
break;
|
||||
}
|
||||
case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
|
||||
// Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
|
||||
// load. Done after lowering and combining so that we have a chance to
|
||||
// optimize this to VMV_V_X_VL when the upper bits aren't needed.
|
||||
assert(N->getNumOperands() == 4 && "Unexpected number of operands");
|
||||
MVT VT = N->getSimpleValueType(0);
|
||||
SDValue Passthru = N->getOperand(0);
|
||||
SDValue Lo = N->getOperand(1);
|
||||
SDValue Hi = N->getOperand(2);
|
||||
SDValue VL = N->getOperand(3);
|
||||
assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
|
||||
Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
|
||||
"Unexpected VTs!");
|
||||
MachineFunction &MF = CurDAG->getMachineFunction();
|
||||
RISCVMachineFunctionInfo *FuncInfo =
|
||||
MF.getInfo<RISCVMachineFunctionInfo>();
|
||||
SDLoc DL(N);
|
||||
|
||||
// We use the same frame index we use for moving two i32s into 64-bit FPR.
|
||||
// This is an analogous operation.
|
||||
int FI = FuncInfo->getMoveF64FrameIndex(MF);
|
||||
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
|
||||
const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
|
||||
SDValue StackSlot =
|
||||
CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
|
||||
|
||||
SDValue Chain = CurDAG->getEntryNode();
|
||||
Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
|
||||
|
||||
SDValue OffsetSlot =
|
||||
CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
|
||||
Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
|
||||
Align(8));
|
||||
|
||||
Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
|
||||
|
||||
SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
|
||||
SDValue IntID =
|
||||
CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
|
||||
SDValue Ops[] = {Chain,
|
||||
IntID,
|
||||
Passthru,
|
||||
StackSlot,
|
||||
CurDAG->getRegister(RISCV::X0, MVT::i64),
|
||||
VL};
|
||||
|
||||
Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
|
||||
MVT::i64, MPI, Align(8),
|
||||
MachineMemOperand::MOLoad);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (Result) {
|
||||
LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: ");
|
||||
LLVM_DEBUG(N->dump(CurDAG));
|
||||
LLVM_DEBUG(dbgs() << "\nNew: ");
|
||||
LLVM_DEBUG(Result->dump(CurDAG));
|
||||
LLVM_DEBUG(dbgs() << "\n");
|
||||
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (MadeChange)
|
||||
CurDAG->RemoveDeadNodes();
|
||||
}
|
||||
|
||||
void RISCVDAGToDAGISel::PostprocessISelDAG() {
|
||||
HandleSDNode Dummy(CurDAG->getRoot());
|
||||
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
|
||||
|
|
|
@ -37,7 +37,6 @@ public:
|
|||
return SelectionDAGISel::runOnMachineFunction(MF);
|
||||
}
|
||||
|
||||
void PreprocessISelDAG() override;
|
||||
void PostprocessISelDAG() override;
|
||||
|
||||
void Select(SDNode *Node) override;
|
||||
|
@ -108,50 +107,6 @@ private:
|
|||
bool doPeepholeSExtW(SDNode *Node);
|
||||
};
|
||||
|
||||
namespace RISCV {
|
||||
struct VLEPseudo {
|
||||
uint16_t Masked : 1;
|
||||
uint16_t IsTU : 1;
|
||||
uint16_t Strided : 1;
|
||||
uint16_t FF : 1;
|
||||
uint16_t Log2SEW : 3;
|
||||
uint16_t LMUL : 3;
|
||||
uint16_t Pseudo;
|
||||
};
|
||||
|
||||
struct VSEPseudo {
|
||||
uint16_t Masked :1;
|
||||
uint16_t Strided : 1;
|
||||
uint16_t Log2SEW : 3;
|
||||
uint16_t LMUL : 3;
|
||||
uint16_t Pseudo;
|
||||
};
|
||||
|
||||
struct VLX_VSXPseudo {
|
||||
uint16_t Masked : 1;
|
||||
uint16_t IsTU : 1;
|
||||
uint16_t Ordered : 1;
|
||||
uint16_t Log2SEW : 3;
|
||||
uint16_t LMUL : 3;
|
||||
uint16_t IndexLMUL : 3;
|
||||
uint16_t Pseudo;
|
||||
};
|
||||
|
||||
struct RISCVMaskedPseudoInfo {
|
||||
uint16_t MaskedPseudo;
|
||||
uint16_t UnmaskedPseudo;
|
||||
uint16_t UnmaskedTUPseudo;
|
||||
uint8_t MaskOpIdx;
|
||||
};
|
||||
|
||||
#define GET_RISCVVLETable_DECL
|
||||
#define GET_RISCVVSETable_DECL
|
||||
#define GET_RISCVVLXTable_DECL
|
||||
#define GET_RISCVVSXTable_DECL
|
||||
#define GET_RISCVMaskedPseudosTable_DECL
|
||||
#include "RISCVGenSearchableTables.inc"
|
||||
} // namespace RISCV
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
|
|
@ -21,7 +21,9 @@
|
|||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/MemoryLocation.h"
|
||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||
#include "llvm/CodeGen/Analysis.h"
|
||||
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
|
@ -958,114 +960,6 @@ bool RISCVTargetLowering::
|
|||
return !XC;
|
||||
}
|
||||
|
||||
bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
|
||||
switch (Opcode) {
|
||||
case Instruction::Add:
|
||||
case Instruction::Sub:
|
||||
case Instruction::Mul:
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::FSub:
|
||||
case Instruction::FMul:
|
||||
case Instruction::FDiv:
|
||||
case Instruction::ICmp:
|
||||
case Instruction::FCmp:
|
||||
return true;
|
||||
case Instruction::Shl:
|
||||
case Instruction::LShr:
|
||||
case Instruction::AShr:
|
||||
case Instruction::UDiv:
|
||||
case Instruction::SDiv:
|
||||
case Instruction::URem:
|
||||
case Instruction::SRem:
|
||||
return Operand == 1;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
|
||||
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
|
||||
return false;
|
||||
|
||||
if (canSplatOperand(I->getOpcode(), Operand))
|
||||
return true;
|
||||
|
||||
auto *II = dyn_cast<IntrinsicInst>(I);
|
||||
if (!II)
|
||||
return false;
|
||||
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::fma:
|
||||
case Intrinsic::vp_fma:
|
||||
return Operand == 0 || Operand == 1;
|
||||
case Intrinsic::vp_shl:
|
||||
case Intrinsic::vp_lshr:
|
||||
case Intrinsic::vp_ashr:
|
||||
case Intrinsic::vp_udiv:
|
||||
case Intrinsic::vp_sdiv:
|
||||
case Intrinsic::vp_urem:
|
||||
case Intrinsic::vp_srem:
|
||||
return Operand == 1;
|
||||
// These intrinsics are commutative.
|
||||
case Intrinsic::vp_add:
|
||||
case Intrinsic::vp_mul:
|
||||
case Intrinsic::vp_and:
|
||||
case Intrinsic::vp_or:
|
||||
case Intrinsic::vp_xor:
|
||||
case Intrinsic::vp_fadd:
|
||||
case Intrinsic::vp_fmul:
|
||||
// These intrinsics have 'vr' versions.
|
||||
case Intrinsic::vp_sub:
|
||||
case Intrinsic::vp_fsub:
|
||||
case Intrinsic::vp_fdiv:
|
||||
return Operand == 0 || Operand == 1;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if sinking \p I's operands to I's basic block is profitable, because
|
||||
/// the operands can be folded into a target instruction, e.g.
|
||||
/// splats of scalars can fold into vector instructions.
|
||||
bool RISCVTargetLowering::shouldSinkOperands(
|
||||
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
|
||||
return false;
|
||||
|
||||
for (auto OpIdx : enumerate(I->operands())) {
|
||||
if (!canSplatOperand(I, OpIdx.index()))
|
||||
continue;
|
||||
|
||||
Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
|
||||
// Make sure we are not already sinking this operand
|
||||
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
|
||||
continue;
|
||||
|
||||
// We are looking for a splat that can be sunk.
|
||||
if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
|
||||
m_Undef(), m_ZeroMask())))
|
||||
continue;
|
||||
|
||||
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
|
||||
// and vector registers
|
||||
for (Use &U : Op->uses()) {
|
||||
Instruction *Insn = cast<Instruction>(U.getUser());
|
||||
if (!canSplatOperand(Insn, U.getOperandNo()))
|
||||
return false;
|
||||
}
|
||||
|
||||
Ops.push_back(&Op->getOperandUse(0));
|
||||
Ops.push_back(&OpIdx.value());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
|
||||
unsigned Opc = VecOp.getOpcode();
|
||||
|
||||
|
@ -1878,7 +1772,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
|
|||
|
||||
// Return the value of the return address register, marking it an implicit
|
||||
// live-in.
|
||||
Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
|
||||
Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT, false));
|
||||
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
|
||||
}
|
||||
|
||||
|
@ -5635,7 +5529,7 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
|
|||
MachineRegisterInfo &RegInfo = MF.getRegInfo();
|
||||
EVT LocVT = VA.getLocVT();
|
||||
SDValue Val;
|
||||
const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
|
||||
const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT(), true);
|
||||
Register VReg = RegInfo.createVirtualRegister(RC);
|
||||
RegInfo.addLiveIn(VA.getLocReg(), VReg);
|
||||
return DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
|
||||
|
@ -7261,6 +7155,88 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
|
|||
return OptSize && !VT.isVector();
|
||||
}
|
||||
|
||||
bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
||||
const SDNode *N, FunctionLoweringInfo *FLI,
|
||||
LegacyDivergenceAnalysis *KDA) const {
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::CopyFromReg: {
|
||||
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
|
||||
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
|
||||
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
||||
Register Reg = R->getReg();
|
||||
|
||||
// FIXME: Why does this need to consider isLiveIn?
|
||||
if (Reg.isPhysical() || MRI.isLiveIn(Reg))
|
||||
return !TRI->isSGPRReg(MRI, Reg);
|
||||
|
||||
if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
|
||||
return KDA->isDivergent(V);
|
||||
|
||||
return !TRI->isSGPRReg(MRI, Reg);
|
||||
}
|
||||
case ISD::LOAD: {
|
||||
const LoadSDNode *L = cast<LoadSDNode>(N);
|
||||
unsigned AS = L->getAddressSpace();
|
||||
// A flat load may access private memory.
|
||||
// return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
|
||||
return true;
|
||||
}
|
||||
case ISD::CALLSEQ_END:
|
||||
return true;
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
return RISCVII::isIntrinsicSourceOfDivergence(
|
||||
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
|
||||
case ISD::INTRINSIC_W_CHAIN:
|
||||
return RISCVII::isIntrinsicSourceOfDivergence(
|
||||
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
|
||||
/*
|
||||
case AMDGPUISD::ATOMIC_CMP_SWAP:
|
||||
case AMDGPUISD::ATOMIC_INC:
|
||||
case AMDGPUISD::ATOMIC_DEC:
|
||||
case AMDGPUISD::ATOMIC_LOAD_FMIN:
|
||||
case AMDGPUISD::ATOMIC_LOAD_FMAX:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_SWAP:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_ADD:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_SUB:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_SMIN:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_UMIN:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_SMAX:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_UMAX:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_AND:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_OR:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_XOR:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_INC:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_DEC:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_CSUB:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_FADD:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_FMIN:
|
||||
case AMDGPUISD::BUFFER_ATOMIC_FMAX:
|
||||
// Target-specific read-modify-write atomics are sources of divergence.
|
||||
return true;
|
||||
*/
|
||||
default:
|
||||
if (auto *A = dyn_cast<AtomicSDNode>(N)) {
|
||||
// Generic read-modify-write atomics are sources of divergence.
|
||||
return A->readMem() && A->writeMem();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Support child registers
|
||||
const TargetRegisterClass *
|
||||
RISCVTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
|
||||
const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
|
||||
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
||||
if (!TRI->isSGPRClass(RC) && !isDivergent)
|
||||
return &RISCV::GPRRegClass;
|
||||
else if (TRI->isSGPRClass(RC) && isDivergent)
|
||||
return &RISCV::VGPRRegClass;
|
||||
|
||||
return RC;
|
||||
}
|
||||
|
||||
#define GET_REGISTER_MATCHER
|
||||
#include "RISCVGenAsmMatcher.inc"
|
||||
|
||||
|
|
|
@ -363,20 +363,14 @@ public:
|
|||
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
|
||||
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
|
||||
SelectionDAG &DAG) const override;
|
||||
/// Return true if the (vector) instruction I will be lowered to an instruction
|
||||
/// with a scalar splat operand for the given Operand number.
|
||||
bool canSplatOperand(Instruction *I, int Operand) const;
|
||||
/// Return true if a vector instruction will lower to a target instruction
|
||||
/// able to splat the given operand.
|
||||
bool canSplatOperand(unsigned Opcode, int Operand) const;
|
||||
bool shouldSinkOperands(Instruction *I,
|
||||
SmallVectorImpl<Use *> &Ops) const override;
|
||||
bool shouldScalarizeBinop(SDValue VecOp) const override;
|
||||
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
||||
bool isFPImmLegal(const APFloat &Imm, EVT VT,
|
||||
bool ForCodeSize) const override;
|
||||
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
|
||||
unsigned Index) const override;
|
||||
bool isSDNodeSourceOfDivergence(const SDNode *N,
|
||||
FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
|
||||
|
||||
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
|
||||
|
||||
|
@ -495,6 +489,9 @@ public:
|
|||
Register getRegisterByName(const char *RegName, LLT VT,
|
||||
const MachineFunction &MF) const override;
|
||||
|
||||
const TargetRegisterClass *getRegClassFor(MVT VT,
|
||||
bool isDivergent) const override;
|
||||
|
||||
// Lower incoming arguments, copy physregs into vregs
|
||||
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool IsVarArg,
|
||||
|
|
|
@ -1140,32 +1140,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
|
|||
}
|
||||
|
||||
const uint64_t TSFlags = Desc.TSFlags;
|
||||
if (RISCVII::hasMergeOp(TSFlags)) {
|
||||
unsigned OpIdx = RISCVII::getMergeOpNum(Desc);
|
||||
if (MI.findTiedOperandIdx(0) != OpIdx) {
|
||||
ErrInfo = "Merge op improperly tied";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (RISCVII::hasVLOp(TSFlags)) {
|
||||
const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
|
||||
if (!Op.isImm() && !Op.isReg()) {
|
||||
ErrInfo = "Invalid operand type for VL operand";
|
||||
return false;
|
||||
}
|
||||
if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
|
||||
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
|
||||
auto *RC = MRI.getRegClass(Op.getReg());
|
||||
if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
|
||||
ErrInfo = "Invalid register class for VL operand";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!RISCVII::hasSEWOp(TSFlags)) {
|
||||
ErrInfo = "VL operand w/o SEW operand?";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (RISCVII::hasSEWOp(TSFlags)) {
|
||||
unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
|
||||
uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
|
||||
|
@ -1179,27 +1153,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
|
|||
return false;
|
||||
}
|
||||
}
|
||||
if (RISCVII::hasVecPolicyOp(TSFlags)) {
|
||||
unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
|
||||
uint64_t Policy = MI.getOperand(OpIdx).getImm();
|
||||
if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
|
||||
ErrInfo = "Invalid Policy Value";
|
||||
return false;
|
||||
}
|
||||
if (!RISCVII::hasVLOp(TSFlags)) {
|
||||
ErrInfo = "policy operand w/o VL operand?";
|
||||
return false;
|
||||
}
|
||||
|
||||
// VecPolicy operands can only exist on instructions with passthru/merge
|
||||
// arguments. Note that not all arguments with passthru have vec policy
|
||||
// operands- some instructions have implicit policies.
|
||||
unsigned UseOpIdx;
|
||||
if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
|
||||
ErrInfo = "policy operand w/o tied operand?";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1478,17 +1431,6 @@ std::string RISCVInstrInfo::createMIROperandComment(
|
|||
|
||||
uint64_t TSFlags = MI.getDesc().TSFlags;
|
||||
|
||||
// Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
|
||||
// operand of vector codegen pseudos.
|
||||
if (RISCVII::hasVecPolicyOp(TSFlags) &&
|
||||
OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
|
||||
unsigned Policy = MI.getOperand(OpIdx).getImm();
|
||||
assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
|
||||
"Invalid Policy Value");
|
||||
OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
|
||||
<< (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
|
||||
}
|
||||
|
||||
OS.flush();
|
||||
return Comment;
|
||||
}
|
||||
|
|
|
@ -103,13 +103,6 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||
if (TFI->hasBP(MF))
|
||||
markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp
|
||||
|
||||
// V registers for code generation. We handle them manually.
|
||||
markSuperRegs(Reserved, RISCV::VL);
|
||||
markSuperRegs(Reserved, RISCV::VTYPE);
|
||||
markSuperRegs(Reserved, RISCV::VXSAT);
|
||||
markSuperRegs(Reserved, RISCV::VXRM);
|
||||
markSuperRegs(Reserved, RISCV::VLENB); // vlenb (constant)
|
||||
|
||||
// Floating point environment registers.
|
||||
markSuperRegs(Reserved, RISCV::FRM);
|
||||
markSuperRegs(Reserved, RISCV::FFLAGS);
|
||||
|
@ -161,6 +154,39 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool RISCVRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
|
||||
Register Reg) const {
|
||||
const TargetRegisterClass *RC;
|
||||
if (Reg.isVirtual())
|
||||
RC = MRI.getRegClass(Reg);
|
||||
else
|
||||
RC = getPhysRegClass(Reg);
|
||||
return RC ? isSGPRClass(RC) : false;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
RISCVRegisterInfo::getPhysRegClass(MCRegister Reg) const {
|
||||
static const TargetRegisterClass *const BaseClasses[] = {
|
||||
/*
|
||||
&RISCV::VGPR_LO16RegClass,
|
||||
&RISCV::VGPR_HI16RegClass,
|
||||
&RISCV::SReg_LO16RegClass,
|
||||
&RISCV::SReg_HI16RegClass,
|
||||
&RISCV::SReg_32RegClass,
|
||||
*/
|
||||
&RISCV::VGPRRegClass,
|
||||
&RISCV::GPRRegClass,
|
||||
};
|
||||
|
||||
for (const TargetRegisterClass *BaseClass : BaseClasses) {
|
||||
if (BaseClass->contains(Reg)) {
|
||||
return BaseClass;
|
||||
}
|
||||
}
|
||||
assert(0 && "TODO: Add sub/super registers");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator II,
|
||||
const DebugLoc &DL, Register DestReg,
|
||||
|
|
|
@ -20,10 +20,43 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
// This needs to be kept in sync with the field bits in VentusRegisterClass.
|
||||
enum RISCVRCFlags {
|
||||
IsVGPR = 1 << 0,
|
||||
IsSGPR = 1 << 1
|
||||
}; // enum RISCVRCFlags
|
||||
|
||||
|
||||
struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
|
||||
|
||||
RISCVRegisterInfo(unsigned HwMode);
|
||||
|
||||
/// \returns true if this class contains VGPR registers.
|
||||
static bool hasVGPRs(const TargetRegisterClass *RC) {
|
||||
return RC->TSFlags & RISCVRCFlags::IsVGPR;
|
||||
}
|
||||
|
||||
/// \returns true if this class contains SGPR registers.
|
||||
static bool hasSGPRs(const TargetRegisterClass *RC) {
|
||||
return RC->TSFlags & RISCVRCFlags::IsSGPR;
|
||||
}
|
||||
|
||||
/// Return the 'base' register class for this register.
|
||||
/// e.g. X5 => SReg_32, V3 => VGPR_32, X5_X6 -> SReg_32, etc.
|
||||
const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const;
|
||||
|
||||
/// \returns true if this class contains only SGPR registers
|
||||
static bool isSGPRClass(const TargetRegisterClass *RC) {
|
||||
return hasSGPRs(RC) && !hasVGPRs(RC);
|
||||
}
|
||||
|
||||
/// \returns true if this class ID contains only SGPR registers
|
||||
bool isSGPRClassID(unsigned RCID) const {
|
||||
return isSGPRClass(getRegClass(RCID));
|
||||
}
|
||||
|
||||
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
|
||||
|
||||
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
|
||||
CallingConv::ID) const override;
|
||||
|
||||
|
@ -33,6 +66,10 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
|
|||
bool isAsmClobberable(const MachineFunction &MF,
|
||||
MCRegister PhysReg) const override;
|
||||
|
||||
bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
|
||||
return !isSGPRClass(RC);
|
||||
}
|
||||
|
||||
const uint32_t *getNoPreservedMask() const override;
|
||||
|
||||
bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
//===-- RISCVSearchableTables.td ---------------------------*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Resource intrinsics table.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class RsrcIntrinsic<RISCVRsrcIntrinsic intr> {
|
||||
Intrinsic Intr = !cast<Intrinsic>(intr);
|
||||
bits<8> RsrcArg = intr.RsrcArg;
|
||||
bit IsImage = intr.IsImage;
|
||||
}
|
||||
/*
|
||||
def RsrcIntrinsics : GenericTable {
|
||||
let FilterClass = "RsrcIntrinsic";
|
||||
let Fields = ["Intr", "RsrcArg", "IsImage"];
|
||||
|
||||
let PrimaryKey = ["Intr"];
|
||||
let PrimaryKeyName = "lookupRsrcIntrinsic";
|
||||
}
|
||||
|
||||
foreach intr = !listconcat(RISCVBufferIntrinsics,
|
||||
RISCVImageDimIntrinsics,
|
||||
RISCVImageDimAtomicIntrinsics) in {
|
||||
def : RsrcIntrinsic<!cast<RISCVRsrcIntrinsic>(intr)>;
|
||||
}
|
||||
*/
|
||||
class VentusBufferFormatBase<bits<8> f, bits<8> bpc, bits<8> numc,
|
||||
bits<8> nfmt, bits<8> dfmt> {
|
||||
bits<8> Format = f;
|
||||
bits<8> BitsPerComp = bpc;
|
||||
bits<8> NumComponents = numc;
|
||||
bits<8> NumFormat = nfmt;
|
||||
bits<8> DataFormat = dfmt;
|
||||
}
|
||||
|
||||
class VentusBufferFormatTable : GenericTable {
|
||||
let CppTypeName = "GcnBufferFormatInfo";
|
||||
let Fields = ["Format", "BitsPerComp", "NumComponents", "NumFormat", "DataFormat"];
|
||||
let PrimaryKey = ["BitsPerComp", "NumComponents", "NumFormat"];
|
||||
}
|
||||
|
||||
class SourceOfDivergence<Intrinsic intr> {
|
||||
Intrinsic Intr = intr;
|
||||
}
|
||||
|
||||
def SourcesOfDivergence : GenericTable {
|
||||
let FilterClass = "SourceOfDivergence";
|
||||
let Fields = ["Intr"];
|
||||
|
||||
let PrimaryKey = ["Intr"];
|
||||
let PrimaryKeyName = "lookupSourceOfDivergence";
|
||||
}
|
||||
|
||||
def : SourceOfDivergence<int_riscv_workitem_id_x>;
|
||||
def : SourceOfDivergence<int_riscv_workitem_id_y>;
|
||||
def : SourceOfDivergence<int_riscv_workitem_id_z>;
|
||||
//def : SourceOfDivergence<int_riscv_interp_mov>;
|
||||
//def : SourceOfDivergence<int_riscv_interp_p1>;
|
||||
|
||||
// The dummy boolean output is divergent from the IR's perspective,
|
||||
// but the mask results are uniform. These produce a divergent and
|
||||
// uniform result, so the returned struct is collectively divergent.
|
||||
// isAlwaysUniform can override the extract of the uniform component.
|
||||
//def : SourceOfDivergence<int_riscv_if>;
|
||||
//def : SourceOfDivergence<int_riscv_else>;
|
||||
//def : SourceOfDivergence<int_riscv_loop>;
|
||||
|
||||
//foreach intr = RISCVImageDimAtomicIntrinsics in
|
||||
//def : SourceOfDivergence<intr>;
|
|
@ -258,58 +258,8 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
|
|||
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
|
||||
|
||||
// TODO: Handle scalar type.
|
||||
if (!LT.second.isVector())
|
||||
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
|
||||
Args, CxtI);
|
||||
|
||||
|
||||
auto getConstantMatCost =
|
||||
[&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
|
||||
if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))
|
||||
// Two sub-cases:
|
||||
// * Has a 5 bit immediate operand which can be splatted.
|
||||
// * Has a larger immediate which must be materialized in scalar register
|
||||
// We return 0 for both as we currently ignore the cost of materializing
|
||||
// scalar constants in GPRs.
|
||||
return 0;
|
||||
|
||||
// Add a cost of address generation + the cost of the vector load. The
|
||||
// address is expected to be a PC relative offset to a constant pool entry
|
||||
// using auipc/addi.
|
||||
return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
|
||||
/*AddressSpace=*/0, CostKind);
|
||||
};
|
||||
|
||||
// Add the cost of materializing any constant vectors required.
|
||||
InstructionCost ConstantMatCost = 0;
|
||||
if (Op1Info.isConstant())
|
||||
ConstantMatCost += getConstantMatCost(0, Op1Info);
|
||||
if (Op2Info.isConstant())
|
||||
ConstantMatCost += getConstantMatCost(1, Op2Info);
|
||||
|
||||
switch (TLI->InstructionOpcodeToISD(Opcode)) {
|
||||
case ISD::ADD:
|
||||
case ISD::SUB:
|
||||
case ISD::AND:
|
||||
case ISD::OR:
|
||||
case ISD::XOR:
|
||||
case ISD::SHL:
|
||||
case ISD::SRL:
|
||||
case ISD::SRA:
|
||||
case ISD::MUL:
|
||||
case ISD::MULHS:
|
||||
case ISD::MULHU:
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL:
|
||||
case ISD::FNEG: {
|
||||
return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
|
||||
}
|
||||
default:
|
||||
return ConstantMatCost +
|
||||
BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
|
||||
Args, CxtI);
|
||||
}
|
||||
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
|
||||
Args, CxtI);
|
||||
}
|
||||
|
||||
void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
|
|
|
@ -27,6 +27,16 @@ let Namespace = "RISCV" in {
|
|||
// vALUop vd, vs0, vs1
|
||||
// where the first instruction tells HW uses v[0-31] for vd, vs1,
|
||||
// uses v[32-63] for vs0.
|
||||
class RVRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
|
||||
: RegisterClass <n, rTypes, Align, rList> {
|
||||
// vALU and sALU registers
|
||||
field bit IsVGPR = 0;
|
||||
field bit IsSGPR = 0;
|
||||
|
||||
let TSFlags{0} = IsVGPR;
|
||||
let TSFlags{1} = IsSGPR;
|
||||
}
|
||||
|
||||
class RISCVReg<bits<8> Enc, string n, list<string> alt = []> : Register<n> {
|
||||
let HWEncoding{7-0} = Enc;
|
||||
let AltNames = alt;
|
||||
|
@ -235,7 +245,7 @@ def XLenRI : RegInfoByHwMode<
|
|||
|
||||
// The order of registers represents the preferred allocation sequence.
|
||||
// Registers are listed in the order caller-save, callee-save, specials.
|
||||
def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
|
||||
def GPR : RVRegisterClass<"RISCV", [XLenVT], 32, (add
|
||||
(sequence "X%u", 10, 17),
|
||||
(sequence "X%u", 5, 7),
|
||||
(sequence "X%u", 28, 31),
|
||||
|
@ -244,49 +254,57 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
|
|||
(sequence "X%u", 0, 4)
|
||||
)> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
|
||||
def GPRX0 : RVRegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
|
||||
def GPRNoX0 : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
|
||||
def GPRNoX0X2 : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
// Don't use X1 or X5 for JALR since that is a hint to pop the return address
|
||||
// stack on some microarchitectures. Also remove the reserved registers X0, X2,
|
||||
// X3, and X4 as it reduces the number of register classes that get synthesized
|
||||
// by tablegen.
|
||||
def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
|
||||
def GPRJALR : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
|
||||
def GPRC : RVRegisterClass<"RISCV", [XLenVT], 32, (add
|
||||
(sequence "X%u", 10, 15),
|
||||
(sequence "X%u", 8, 9)
|
||||
)> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
// For indirect tail calls, we can't use callee-saved registers, as they are
|
||||
// restored to the saved value before the tail call, which would clobber a call
|
||||
// address. We shouldn't use x5 since that is a hint for to pop the return
|
||||
// address stack on some microarchitectures.
|
||||
def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
|
||||
def GPRTC : RVRegisterClass<"RISCV", [XLenVT], 32, (add
|
||||
(sequence "X%u", 6, 7),
|
||||
(sequence "X%u", 10, 17),
|
||||
(sequence "X%u", 28, 31)
|
||||
)> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
|
||||
def SP : RVRegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
|
||||
let RegInfos = XLenRI;
|
||||
let IsSGPR = 1;
|
||||
}
|
||||
|
||||
// Floating point registers
|
||||
|
@ -466,11 +484,13 @@ def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16]>;
|
|||
// TODO: Add half and register pair support!!
|
||||
// FIMXE: BranchCC_vvi doesn't support f32 VGPR in pattern(riscv_brcc).
|
||||
// VGPR 32-bit registers class
|
||||
def VGPR : RegisterClass<"RISCV",
|
||||
def VGPR : RVRegisterClass<"RISCV",
|
||||
// !listconcat(Reg32Types.types, Reg16Types.types),
|
||||
[i32/*, f32*/],
|
||||
32,
|
||||
(add (sequence "V%u", 0, 255))>;
|
||||
(add (sequence "V%u", 0, 255))> {
|
||||
let IsVGPR = 1;
|
||||
}
|
||||
|
||||
// VGPR 64-bit registers class
|
||||
def VGPR_64 : VentusRegisterTuples<getSubRegs<2>.ret, VGPR, 255, 1, 2, "v">;
|
||||
|
|
Loading…
Reference in New Issue