Drafting divergent related code, not working yet.

2022-12-19 18:11:34 +08:00 · 2022-12-19 18:11:34 +08:00 · dee3135130
parent 878fba7d40
commit dee3135130
25 changed files with 523 additions and 503 deletions
--- a/clang/include/clang/Basic/BuiltinsRISCV.def
+++ b/clang/include/clang/Basic/BuiltinsRISCV.def
@ -15,6 +15,30 @@
 #   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
 #endif

+//===----------------------------------------------------------------------===//
+// Ventus OpenCL builtins.
+//===----------------------------------------------------------------------===//
+
+BUILTIN(__builtin_riscv_workgroup_id_x, "Ui", "nc")
+BUILTIN(__builtin_riscv_workgroup_id_y, "Ui", "nc")
+BUILTIN(__builtin_riscv_workgroup_id_z, "Ui", "nc")
+
+BUILTIN(__builtin_riscv_workitem_id_x, "Ui", "nc")
+BUILTIN(__builtin_riscv_workitem_id_y, "Ui", "nc")
+BUILTIN(__builtin_riscv_workitem_id_z, "Ui", "nc")
+
+BUILTIN(__builtin_riscv_workgroup_size_x, "Us", "nc")
+BUILTIN(__builtin_riscv_workgroup_size_y, "Us", "nc")
+BUILTIN(__builtin_riscv_workgroup_size_z, "Us", "nc")
+
+BUILTIN(__builtin_riscv_grid_size_x, "Ui", "nc")
+BUILTIN(__builtin_riscv_grid_size_y, "Ui", "nc")
+BUILTIN(__builtin_riscv_grid_size_z, "Ui", "nc")
+
+//===----------------------------------------------------------------------===//
+// Standard RISCV instruction builtins.
+//===----------------------------------------------------------------------===//
+
 // Zbb extension
 TARGET_BUILTIN(__builtin_riscv_orc_b_32, "ZiZi", "nc", "zbb")
 TARGET_BUILTIN(__builtin_riscv_orc_b_64, "WiWi", "nc", "zbb,64bit")
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@ -116,6 +116,10 @@ public:
    IntPtrType = SignedInt;
    PtrDiffType = SignedInt;
    SizeType = UnsignedInt;
+    //HasLegalHalfType = true;
+    //HasFloat16 = true;
+    //resetDataLayout("e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256"
+    //       "-v256:256-v512:512-v1024:1024-n32:64-S128");
    resetDataLayout("e-m:e-p:32:32-i64:64-n32-S128");
  }

@ -133,6 +137,25 @@ public:
    if (ISAInfo->hasExtension("a"))
      MaxAtomicInlineWidth = 32;
  }
+
+  void setSupportedOpenCLOpts() override {
+    auto &Opts = getSupportedOpenCLOpts();
+    Opts["cl_khr_fp16"] = true;
+    Opts["cl_clang_storage_class_specifiers"] = true;
+    Opts["__cl_clang_variadic_functions"] = true;
+    Opts["__opencl_c_images"] = true;
+    Opts["__opencl_c_3d_image_writes"] = true;
+  }
+
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    switch (CC) {
+    default:
+      return CCCR_Warning;
+    case CC_C:
+    case CC_OpenCLKernel:
+      return CCCR_OK;
+    }
+  }
 };
 class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
 public:
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@ -19460,6 +19460,33 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
  llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;
  switch (BuiltinID) {
  default: llvm_unreachable("unexpected builtin ID");
+
+  // Ventus GPGPU workitem
+  case RISCV::BI__builtin_riscv_workitem_id_x:
+    return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_x, 0, 1024);
+  case RISCV::BI__builtin_riscv_workitem_id_y:
+    return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_y, 0, 1024);
+  case RISCV::BI__builtin_riscv_workitem_id_z:
+    return emitRangedBuiltin(*this, Intrinsic::riscv_workitem_id_z, 0, 1024);
+
+  // FIXME: Use AMDGPU function here temporarily
+
+  // Ventus GPGPU workgroup size
+  case RISCV::BI__builtin_riscv_workgroup_size_x:
+    return EmitAMDGPUWorkGroupSize(*this, 0);
+  case RISCV::BI__builtin_riscv_workgroup_size_y:
+    return EmitAMDGPUWorkGroupSize(*this, 1);
+  case RISCV::BI__builtin_riscv_workgroup_size_z:
+    return EmitAMDGPUWorkGroupSize(*this, 2);
+
+  // Ventus GPGPU grid size
+  case RISCV::BI__builtin_riscv_grid_size_x:
+    return EmitAMDGPUGridSize(*this, 0);
+  case RISCV::BI__builtin_riscv_grid_size_y:
+    return EmitAMDGPUGridSize(*this, 1);
+  case RISCV::BI__builtin_riscv_grid_size_z:
+    return EmitAMDGPUGridSize(*this, 2);
+
  case RISCV::BI__builtin_riscv_orc_b_32:
  case RISCV::BI__builtin_riscv_orc_b_64:
  case RISCV::BI__builtin_riscv_clz_32:
--- a/libclc/riscv/lib/SOURCES
+++ b/libclc/riscv/lib/SOURCES
@ -0,0 +1,7 @@
+workitem/get_global_offset.cl
+workitem/get_group_id.cl
+workitem/get_global_size.cl
+workitem/get_local_id.cl
+workitem/get_local_size.cl
+workitem/get_num_groups.cl
+workitem/get_work_dim.cl
--- a/libclc/riscv/lib/workitem/get_global_offset.cl
+++ b/libclc/riscv/lib/workitem/get_global_offset.cl
@ -0,0 +1,19 @@
+#include <clc/clc.h>
+
+#if __clang_major__ >= 8
+#define CONST_AS __constant
+#elif __clang_major__ >= 7
+#define CONST_AS __attribute__((address_space(4)))
+#else
+#define CONST_AS __attribute__((address_space(2)))
+#endif
+
+_CLC_DEF _CLC_OVERLOAD size_t get_global_offset(uint dim) {
+  assert(0 && "TODO");
+  /*
+  CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
+  if (dim < 3)
+    return ptr[dim + 1];
+  */
+  return 0;
+}
--- a/libclc/riscv/lib/workitem/get_global_size.cl
+++ b/libclc/riscv/lib/workitem/get_global_size.cl
@ -0,0 +1,18 @@
+#include <clc/clc.h>
+
+uint __clc_riscv_get_global_size_x(void) __asm("llvm.riscv.read.global.size.x");
+uint __clc_riscv_get_global_size_y(void) __asm("llvm.riscv.read.global.size.y");
+uint __clc_riscv_get_global_size_z(void) __asm("llvm.riscv.read.global.size.z");
+
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_riscv_get_global_size_x();
+  case 1:
+    return __clc_riscv_get_global_size_y();
+  case 2:
+    return __clc_riscv_get_global_size_z();
+  default:
+    return 1;
+  }
+}
--- a/libclc/riscv/lib/workitem/get_group_id.cl
+++ b/libclc/riscv/lib/workitem/get_group_id.cl
@ -0,0 +1,14 @@
+#include <clc/clc.h>
+
+_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
+  switch (dim) {
+  case 0:
+    return __builtin_riscv_workgroup_id_x();
+  case 1:
+    return __builtin_riscv_workgroup_id_y();
+  case 2:
+    return __builtin_riscv_workgroup_id_z();
+  default:
+    return 1;
+  }
+}
--- a/libclc/riscv/lib/workitem/get_local_id.cl
+++ b/libclc/riscv/lib/workitem/get_local_id.cl
@ -0,0 +1,14 @@
+#include <clc/clc.h>
+
+_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
+  switch (dim) {
+  case 0:
+    return __builtin_riscv_workitem_id_x();
+  case 1:
+    return __builtin_riscv_workitem_id_y();
+  case 2:
+    return __builtin_riscv_workitem_id_z();
+  default:
+    return 1;
+  }
+}
--- a/libclc/riscv/lib/workitem/get_local_size.cl
+++ b/libclc/riscv/lib/workitem/get_local_size.cl
@ -0,0 +1,18 @@
+#include <clc/clc.h>
+
+uint __clc_riscv_get_local_size_x(void) __asm("llvm.riscv.read.local.size.x");
+uint __clc_riscv_get_local_size_y(void) __asm("llvm.riscv.read.local.size.y");
+uint __clc_riscv_get_local_size_z(void) __asm("llvm.riscv.read.local.size.z");
+
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_riscv_get_local_size_x();
+  case 1:
+    return __clc_riscv_get_local_size_y();
+  case 2:
+    return __clc_riscv_get_local_size_z();
+  default:
+    return 1;
+  }
+}
--- a/libclc/riscv/lib/workitem/get_num_groups.cl
+++ b/libclc/riscv/lib/workitem/get_num_groups.cl
@ -0,0 +1,18 @@
+#include <clc/clc.h>
+
+uint __clc_riscv_get_num_groups_x(void) __asm("llvm.riscv.read.ngroups.x");
+uint __clc_riscv_get_num_groups_y(void) __asm("llvm.riscv.read.ngroups.y");
+uint __clc_riscv_get_num_groups_z(void) __asm("llvm.riscv.read.ngroups.z");
+
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_riscv_get_num_groups_x();
+  case 1:
+    return __clc_riscv_get_num_groups_y();
+  case 2:
+    return __clc_riscv_get_num_groups_z();
+  default:
+    return 1;
+  }
+}
--- a/libclc/riscv/lib/workitem/get_work_dim.cl
+++ b/libclc/riscv/lib/workitem/get_work_dim.cl
@ -0,0 +1,15 @@
+#include <clc/clc.h>
+
+#if __clang_major__ >= 8
+#define CONST_AS __constant
+#elif __clang_major__ >= 7
+#define CONST_AS __attribute__((address_space(4)))
+#else
+#define CONST_AS __attribute__((address_space(2)))
+#endif
+
+_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
+  assert(0 && "TODO");
+  // CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
+  // return ptr[0];
+}
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@ -10,6 +10,40 @@
 //
 //===----------------------------------------------------------------------===//

+class RISCVReadPreloadRegisterIntrinsic
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
+
+class RISCVReadPreloadRegisterIntrinsicNamed<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>,
+    ClangBuiltin<name>;
+
+// Used to tag image and resource intrinsics with information used to generate
+// mem operands.
+class RISCVRsrcIntrinsic<int rsrcarg, bit isimage = false> {
+  int RsrcArg = rsrcarg;
+  bit IsImage = isimage;
+}
+
+
+multiclass RISCVReadPreloadRegisterIntrinsic_xyz {
+  def _x : RISCVReadPreloadRegisterIntrinsic;
+  def _y : RISCVReadPreloadRegisterIntrinsic;
+  def _z : RISCVReadPreloadRegisterIntrinsic;
+}
+
+multiclass RISCVReadPreloadRegisterIntrinsic_xyz_named<string prefix> {
+  def _x : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_x")>;
+  def _y : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_y")>;
+  def _z : RISCVReadPreloadRegisterIntrinsicNamed<!strconcat(prefix, "_z")>;
+}
+
+//===----------------------------------------------------------------------===//
+// ABI Special Intrinsics
+
+defm int_riscv_workitem_id : RISCVReadPreloadRegisterIntrinsic_xyz;
+defm int_riscv_workgroup_id : RISCVReadPreloadRegisterIntrinsic_xyz_named
+                               <"__builtin_riscv_workgroup_id">;
+
 //===----------------------------------------------------------------------===//
 // Atomics

@ -570,7 +604,7 @@ let TargetPrefix = "riscv" in {
  class RISCVClassifyMasked
        : Intrinsic<[LLVMVectorOfBitcastsToInt<0>],
                    [LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty,
-                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, 
+                     LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
                     llvm_anyint_ty, LLVMMatchType<1>],
                    [IntrNoMem, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic {
    let VLOperand = 3;
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@ -133,7 +133,8 @@ parseFeatureBits(bool IsRV64, const FeatureBitset &FeatureBits) {
 unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
                                 bool TailAgnostic, bool MaskAgnostic) {
  assert(isValidSEW(SEW) && "Invalid SEW");
-  unsigned VLMULBits = static_cast<unsigned>(VLMUL);
+  // LMUL1 encoding is 000
+  unsigned VLMULBits = 0;
  unsigned VSEWBits = encodeSEW(SEW);
  unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7);
  if (TailAgnostic)
@ -144,57 +145,23 @@ unsigned RISCVVType::encodeVTYPE(RISCVII::VLMUL VLMUL, unsigned SEW,
  return VTypeI;
 }

-std::pair<unsigned, bool> RISCVVType::decodeVLMUL(RISCVII::VLMUL VLMUL) {
-  switch (VLMUL) {
-  default:
-    llvm_unreachable("Unexpected LMUL value!");
-  case RISCVII::VLMUL::LMUL_1:
-  case RISCVII::VLMUL::LMUL_2:
-  case RISCVII::VLMUL::LMUL_4:
-  case RISCVII::VLMUL::LMUL_8:
-    return std::make_pair(1 << static_cast<unsigned>(VLMUL), false);
-  case RISCVII::VLMUL::LMUL_F2:
-  case RISCVII::VLMUL::LMUL_F4:
-  case RISCVII::VLMUL::LMUL_F8:
-    return std::make_pair(1 << (8 - static_cast<unsigned>(VLMUL)), true);
-  }
-}
-
 void RISCVVType::printVType(unsigned VType, raw_ostream &OS) {
  unsigned Sew = getSEW(VType);
-  OS << "e" << Sew;
-
-  unsigned LMul;
-  bool Fractional;
-  std::tie(LMul, Fractional) = decodeVLMUL(getVLMUL(VType));
-
-  if (Fractional)
-    OS << ", mf";
-  else
-    OS << ", m";
-  OS << LMul;
-
-  if (isTailAgnostic(VType))
-    OS << ", ta";
-  else
-    OS << ", tu";
-
-  if (isMaskAgnostic(VType))
-    OS << ", ma";
-  else
-    OS << ", mu";
-}
-
-unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
-  unsigned LMul;
-  bool Fractional;
-  std::tie(LMul, Fractional) = decodeVLMUL(VLMul);
-
-  // Convert LMul to a fixed point value with 3 fractional bits.
-  LMul = Fractional ? (8 / LMul) : (LMul * 8);
-
-  assert(SEW >= 8 && "Unexpected SEW value");
-  return (SEW * 8) / LMul;
+  OS << "e" << Sew << ", m1, ta, ma";
 }

 } // namespace llvm
+
+namespace {
+struct SourceOfDivergence {
+  unsigned Intr;
+};
+const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
+
+#define GET_SourcesOfDivergence_IMPL
+#include "RISCVGenSearchableTables.inc"
+} // end anonymous namespace
+
+bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
+  return lookupSourceOfDivergence(IntrID);
+}
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@ -116,10 +116,6 @@ enum VLMUL : uint8_t {
  LMUL_F2
 };

-enum {
-  TAIL_AGNOSTIC = 1,
-  MASK_AGNOSTIC = 2,
-};

 // Helper functions to read TSFlags.
 /// \returns the format of the instruction.
@ -131,10 +127,7 @@ static inline VConstraintType getConstraint(uint64_t TSFlags) {
  return static_cast<VConstraintType>((TSFlags & ConstraintMask) >>
                                      ConstraintShift);
 }
-/// \returns the LMUL for the instruction.
-static inline VLMUL getLMul(uint64_t TSFlags) {
-  return static_cast<VLMUL>((TSFlags & VLMulMask) >> VLMulShift);
-}
+
 /// \returns true if there is a dummy mask operand for the instruction.
 static inline bool hasDummyMaskOp(uint64_t TSFlags) {
  return TSFlags & HasDummyMaskOpMask;
@ -143,59 +136,17 @@ static inline bool hasDummyMaskOp(uint64_t TSFlags) {
 static inline bool doesForceTailAgnostic(uint64_t TSFlags) {
  return TSFlags & ForceTailAgnosticMask;
 }
-/// \returns true if there is a merge operand for the instruction.
-static inline bool hasMergeOp(uint64_t TSFlags) {
-  return TSFlags & HasMergeOpMask;
-}
 /// \returns true if there is a SEW operand for the instruction.
 static inline bool hasSEWOp(uint64_t TSFlags) {
  return TSFlags & HasSEWOpMask;
 }
-/// \returns true if there is a VL operand for the instruction.
-static inline bool hasVLOp(uint64_t TSFlags) {
-  return TSFlags & HasVLOpMask;
-}
-/// \returns true if there is a vector policy operand for this instruction.
-static inline bool hasVecPolicyOp(uint64_t TSFlags) {
-  return TSFlags & HasVecPolicyOpMask;
-}
-/// \returns true if it is a vector widening reduction instruction.
-static inline bool isRVVWideningReduction(uint64_t TSFlags) {
-  return TSFlags & IsRVVWideningReductionMask;
-}
-/// \returns true if mask policy is valid for the instruction.
-static inline bool usesMaskPolicy(uint64_t TSFlags) {
-  return TSFlags & UsesMaskPolicyMask;
-}

-static inline unsigned getMergeOpNum(const MCInstrDesc &Desc) {
-  assert(hasMergeOp(Desc.TSFlags));
-  assert(!Desc.isVariadic());
-  return Desc.getNumDefs();
-}
-
-static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
-  const uint64_t TSFlags = Desc.TSFlags;
-  // This method is only called if we expect to have a VL operand, and all
-  // instructions with VL also have SEW.
-  assert(hasSEWOp(TSFlags) && hasVLOp(TSFlags));
-  unsigned Offset = 2;
-  if (hasVecPolicyOp(TSFlags))
-    Offset = 3;
-  return Desc.getNumOperands() - Offset;
-}
+/// \returns true if the intrinsic is divergent
+bool isIntrinsicSourceOfDivergence(unsigned IntrID);

 static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
  const uint64_t TSFlags = Desc.TSFlags;
  assert(hasSEWOp(TSFlags));
-  unsigned Offset = 1;
-  if (hasVecPolicyOp(TSFlags))
-    Offset = 2;
-  return Desc.getNumOperands() - Offset;
-}
-
-static inline unsigned getVecPolicyOpNum(const MCInstrDesc &Desc) {
-  assert(hasVecPolicyOp(Desc.TSFlags));
  return Desc.getNumOperands() - 1;
 }

@ -449,13 +400,12 @@ inline static unsigned getSEW(unsigned VType) {
  return decodeVSEW(VSEW);
 }

-inline static bool isTailAgnostic(unsigned VType) { return VType & 0x40; }
+inline static bool isTailAgnostic(unsigned VType) { return 1; }

-inline static bool isMaskAgnostic(unsigned VType) { return VType & 0x80; }
+inline static bool isMaskAgnostic(unsigned VType) { return 1; }

 void printVType(unsigned VType, raw_ostream &OS);

-unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);

 } // namespace RISCVVType

--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@ -542,6 +542,7 @@ include "VentusInstrInfo.td"
 include "GISel/RISCVRegisterBanks.td"
 include "RISCVSchedRocket.td"
 include "RISCVSchedSiFive7.td"
+include "RISCVSearchableTables.td"

 //===----------------------------------------------------------------------===//
 // RISC-V processors supported.
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@ -28,15 +28,6 @@ using namespace llvm;

 #define DEBUG_TYPE "riscv-isel"

-namespace llvm::RISCV {
-#define GET_RISCVVLETable_IMPL
-#define GET_RISCVVSETable_IMPL
-#define GET_RISCVVLXTable_IMPL
-#define GET_RISCVVSXTable_IMPL
-#define GET_RISCVMaskedPseudosTable_IMPL
-#include "RISCVGenSearchableTables.inc"
-} // namespace llvm::RISCV
-
 static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
  assert(Node->getNumOperands() > 0 && "Node with no operands");
  unsigned LastOpIdx = Node->getNumOperands() - 1;
@ -47,104 +38,6 @@ static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
  return LastOpIdx;
 }

-static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID) {
-  assert(RISCVII::hasVecPolicyOp(MCID.TSFlags));
-  (void)MCID;
-  return getLastNonGlueOrChainOpIdx(Node);
-}
-
-void RISCVDAGToDAGISel::PreprocessISelDAG() {
-  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
-
-  bool MadeChange = false;
-  while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = &*--Position;
-    if (N->use_empty())
-      continue;
-
-    SDValue Result;
-    switch (N->getOpcode()) {
-    case ISD::SPLAT_VECTOR: {
-      // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
-      // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
-      MVT VT = N->getSimpleValueType(0);
-      unsigned Opc =
-          VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
-      SDLoc DL(N);
-      SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
-      Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
-                               N->getOperand(0), VL);
-      break;
-    }
-    case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
-      // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
-      // load. Done after lowering and combining so that we have a chance to
-      // optimize this to VMV_V_X_VL when the upper bits aren't needed.
-      assert(N->getNumOperands() == 4 && "Unexpected number of operands");
-      MVT VT = N->getSimpleValueType(0);
-      SDValue Passthru = N->getOperand(0);
-      SDValue Lo = N->getOperand(1);
-      SDValue Hi = N->getOperand(2);
-      SDValue VL = N->getOperand(3);
-      assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
-             Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
-             "Unexpected VTs!");
-      MachineFunction &MF = CurDAG->getMachineFunction();
-      RISCVMachineFunctionInfo *FuncInfo =
-          MF.getInfo<RISCVMachineFunctionInfo>();
-      SDLoc DL(N);
-
-      // We use the same frame index we use for moving two i32s into 64-bit FPR.
-      // This is an analogous operation.
-      int FI = FuncInfo->getMoveF64FrameIndex(MF);
-      MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
-      const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
-      SDValue StackSlot =
-          CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
-
-      SDValue Chain = CurDAG->getEntryNode();
-      Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
-
-      SDValue OffsetSlot =
-          CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
-      Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
-                            Align(8));
-
-      Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
-
-      SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
-      SDValue IntID =
-          CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
-      SDValue Ops[] = {Chain,
-                       IntID,
-                       Passthru,
-                       StackSlot,
-                       CurDAG->getRegister(RISCV::X0, MVT::i64),
-                       VL};
-
-      Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
-                                           MVT::i64, MPI, Align(8),
-                                           MachineMemOperand::MOLoad);
-      break;
-    }
-    }
-
-    if (Result) {
-      LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld:    ");
-      LLVM_DEBUG(N->dump(CurDAG));
-      LLVM_DEBUG(dbgs() << "\nNew: ");
-      LLVM_DEBUG(Result->dump(CurDAG));
-      LLVM_DEBUG(dbgs() << "\n");
-
-      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
-      MadeChange = true;
-    }
-  }
-
-  if (MadeChange)
-    CurDAG->RemoveDeadNodes();
-}
-
 void RISCVDAGToDAGISel::PostprocessISelDAG() {
  HandleSDNode Dummy(CurDAG->getRoot());
  SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@ -37,7 +37,6 @@ public:
    return SelectionDAGISel::runOnMachineFunction(MF);
  }

-  void PreprocessISelDAG() override;
  void PostprocessISelDAG() override;

  void Select(SDNode *Node) override;
@ -108,50 +107,6 @@ private:
  bool doPeepholeSExtW(SDNode *Node);
 };

-namespace RISCV {
-struct VLEPseudo {
-  uint16_t Masked : 1;
-  uint16_t IsTU : 1;
-  uint16_t Strided : 1;
-  uint16_t FF : 1;
-  uint16_t Log2SEW : 3;
-  uint16_t LMUL : 3;
-  uint16_t Pseudo;
-};
-
-struct VSEPseudo {
-  uint16_t Masked :1;
-  uint16_t Strided : 1;
-  uint16_t Log2SEW : 3;
-  uint16_t LMUL : 3;
-  uint16_t Pseudo;
-};
-
-struct VLX_VSXPseudo {
-  uint16_t Masked : 1;
-  uint16_t IsTU : 1;
-  uint16_t Ordered : 1;
-  uint16_t Log2SEW : 3;
-  uint16_t LMUL : 3;
-  uint16_t IndexLMUL : 3;
-  uint16_t Pseudo;
-};
-
-struct RISCVMaskedPseudoInfo {
-  uint16_t MaskedPseudo;
-  uint16_t UnmaskedPseudo;
-  uint16_t UnmaskedTUPseudo;
-  uint8_t MaskOpIdx;
-};
-
-#define GET_RISCVVLETable_DECL
-#define GET_RISCVVSETable_DECL
-#define GET_RISCVVLXTable_DECL
-#define GET_RISCVVSXTable_DECL
-#define GET_RISCVMaskedPseudosTable_DECL
-#include "RISCVGenSearchableTables.inc"
-} // namespace RISCV
-
 } // namespace llvm

 #endif
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@ -21,7 +21,9 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@ -958,114 +960,6 @@ bool RISCVTargetLowering::
  return !XC;
 }

-bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
-  switch (Opcode) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::FAdd:
-  case Instruction::FSub:
-  case Instruction::FMul:
-  case Instruction::FDiv:
-  case Instruction::ICmp:
-  case Instruction::FCmp:
-    return true;
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::URem:
-  case Instruction::SRem:
-    return Operand == 1;
-  default:
-    return false;
-  }
-}
-
-
-bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
-  if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
-    return false;
-
-  if (canSplatOperand(I->getOpcode(), Operand))
-    return true;
-
-  auto *II = dyn_cast<IntrinsicInst>(I);
-  if (!II)
-    return false;
-
-  switch (II->getIntrinsicID()) {
-  case Intrinsic::fma:
-  case Intrinsic::vp_fma:
-    return Operand == 0 || Operand == 1;
-  case Intrinsic::vp_shl:
-  case Intrinsic::vp_lshr:
-  case Intrinsic::vp_ashr:
-  case Intrinsic::vp_udiv:
-  case Intrinsic::vp_sdiv:
-  case Intrinsic::vp_urem:
-  case Intrinsic::vp_srem:
-    return Operand == 1;
-    // These intrinsics are commutative.
-  case Intrinsic::vp_add:
-  case Intrinsic::vp_mul:
-  case Intrinsic::vp_and:
-  case Intrinsic::vp_or:
-  case Intrinsic::vp_xor:
-  case Intrinsic::vp_fadd:
-  case Intrinsic::vp_fmul:
-    // These intrinsics have 'vr' versions.
-  case Intrinsic::vp_sub:
-  case Intrinsic::vp_fsub:
-  case Intrinsic::vp_fdiv:
-    return Operand == 0 || Operand == 1;
-  default:
-    return false;
-  }
-}
-
-/// Check if sinking \p I's operands to I's basic block is profitable, because
-/// the operands can be folded into a target instruction, e.g.
-/// splats of scalars can fold into vector instructions.
-bool RISCVTargetLowering::shouldSinkOperands(
-    Instruction *I, SmallVectorImpl<Use *> &Ops) const {
-  using namespace llvm::PatternMatch;
-
-  if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
-    return false;
-
-  for (auto OpIdx : enumerate(I->operands())) {
-    if (!canSplatOperand(I, OpIdx.index()))
-      continue;
-
-    Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
-    // Make sure we are not already sinking this operand
-    if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
-      continue;
-
-    // We are looking for a splat that can be sunk.
-    if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
-                             m_Undef(), m_ZeroMask())))
-      continue;
-
-    // All uses of the shuffle should be sunk to avoid duplicating it across gpr
-    // and vector registers
-    for (Use &U : Op->uses()) {
-      Instruction *Insn = cast<Instruction>(U.getUser());
-      if (!canSplatOperand(Insn, U.getOperandNo()))
-        return false;
-    }
-
-    Ops.push_back(&Op->getOperandUse(0));
-    Ops.push_back(&OpIdx.value());
-  }
-  return true;
-}
-
 bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
  unsigned Opc = VecOp.getOpcode();

@ -1878,7 +1772,7 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,

  // Return the value of the return address register, marking it an implicit
  // live-in.
-  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
+  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT, false));
  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
 }

@ -5635,7 +5529,7 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
  MachineRegisterInfo &RegInfo = MF.getRegInfo();
  EVT LocVT = VA.getLocVT();
  SDValue Val;
-  const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
+  const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT(), true);
  Register VReg = RegInfo.createVirtualRegister(RC);
  RegInfo.addLiveIn(VA.getLocReg(), VReg);
  return DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
@ -7261,6 +7155,88 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
  return OptSize && !VT.isVector();
 }

+bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
+    const SDNode *N, FunctionLoweringInfo *FLI,
+    LegacyDivergenceAnalysis *KDA) const {
+  switch (N->getOpcode()) {
+  case ISD::CopyFromReg: {
+    const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
+    const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
+    const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
+    Register Reg = R->getReg();
+
+    // FIXME: Why does this need to consider isLiveIn?
+    if (Reg.isPhysical() || MRI.isLiveIn(Reg))
+      return !TRI->isSGPRReg(MRI, Reg);
+
+    if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
+      return KDA->isDivergent(V);
+
+    return !TRI->isSGPRReg(MRI, Reg);
+  }
+  case ISD::LOAD: {
+    const LoadSDNode *L = cast<LoadSDNode>(N);
+    unsigned AS = L->getAddressSpace();
+    // A flat load may access private memory.
+    // return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
+    return true;
+  }
+  case ISD::CALLSEQ_END:
+    return true;
+  case ISD::INTRINSIC_WO_CHAIN:
+    return RISCVII::isIntrinsicSourceOfDivergence(
+        cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
+  case ISD::INTRINSIC_W_CHAIN:
+    return RISCVII::isIntrinsicSourceOfDivergence(
+        cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+  /*
+  case AMDGPUISD::ATOMIC_CMP_SWAP:
+  case AMDGPUISD::ATOMIC_INC:
+  case AMDGPUISD::ATOMIC_DEC:
+  case AMDGPUISD::ATOMIC_LOAD_FMIN:
+  case AMDGPUISD::ATOMIC_LOAD_FMAX:
+  case AMDGPUISD::BUFFER_ATOMIC_SWAP:
+  case AMDGPUISD::BUFFER_ATOMIC_ADD:
+  case AMDGPUISD::BUFFER_ATOMIC_SUB:
+  case AMDGPUISD::BUFFER_ATOMIC_SMIN:
+  case AMDGPUISD::BUFFER_ATOMIC_UMIN:
+  case AMDGPUISD::BUFFER_ATOMIC_SMAX:
+  case AMDGPUISD::BUFFER_ATOMIC_UMAX:
+  case AMDGPUISD::BUFFER_ATOMIC_AND:
+  case AMDGPUISD::BUFFER_ATOMIC_OR:
+  case AMDGPUISD::BUFFER_ATOMIC_XOR:
+  case AMDGPUISD::BUFFER_ATOMIC_INC:
+  case AMDGPUISD::BUFFER_ATOMIC_DEC:
+  case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP:
+  case AMDGPUISD::BUFFER_ATOMIC_CSUB:
+  case AMDGPUISD::BUFFER_ATOMIC_FADD:
+  case AMDGPUISD::BUFFER_ATOMIC_FMIN:
+  case AMDGPUISD::BUFFER_ATOMIC_FMAX:
+    // Target-specific read-modify-write atomics are sources of divergence.
+    return true;
+  */
+  default:
+    if (auto *A = dyn_cast<AtomicSDNode>(N)) {
+      // Generic read-modify-write atomics are sources of divergence.
+      return A->readMem() && A->writeMem();
+    }
+    return false;
+  }
+}
+
+// TODO: Support child registers
+const TargetRegisterClass *
+RISCVTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
+  const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
+  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  if (!TRI->isSGPRClass(RC) && !isDivergent)
+    return &RISCV::GPRRegClass;
+  else if (TRI->isSGPRClass(RC) && isDivergent)
+    return &RISCV::VGPRRegClass;
+
+  return RC;
+}
+
 #define GET_REGISTER_MATCHER
 #include "RISCVGenAsmMatcher.inc"

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@ -363,20 +363,14 @@ public:
      SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
      unsigned OldShiftOpcode, unsigned NewShiftOpcode,
      SelectionDAG &DAG) const override;
-  /// Return true if the (vector) instruction I will be lowered to an instruction
-  /// with a scalar splat operand for the given Operand number.
-  bool canSplatOperand(Instruction *I, int Operand) const;
-  /// Return true if a vector instruction will lower to a target instruction
-  /// able to splat the given operand.
-  bool canSplatOperand(unsigned Opcode, int Operand) const;
-  bool shouldSinkOperands(Instruction *I,
-                          SmallVectorImpl<Use *> &Ops) const override;
  bool shouldScalarizeBinop(SDValue VecOp) const override;
  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
  bool isFPImmLegal(const APFloat &Imm, EVT VT,
                    bool ForCodeSize) const override;
  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
                               unsigned Index) const override;
+  bool isSDNodeSourceOfDivergence(const SDNode *N,
+    FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;

  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;

@ -495,6 +489,9 @@ public:
  Register getRegisterByName(const char *RegName, LLT VT,
                             const MachineFunction &MF) const override;

+  const TargetRegisterClass *getRegClassFor(MVT VT,
+                                            bool isDivergent) const override;
+
  // Lower incoming arguments, copy physregs into vregs
  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
                               bool IsVarArg,
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@ -1140,32 +1140,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
  }

  const uint64_t TSFlags = Desc.TSFlags;
-  if (RISCVII::hasMergeOp(TSFlags)) {
-    unsigned OpIdx = RISCVII::getMergeOpNum(Desc);
-    if (MI.findTiedOperandIdx(0) != OpIdx) {
-      ErrInfo = "Merge op improperly tied";
-      return false;
-    }
-  }
-  if (RISCVII::hasVLOp(TSFlags)) {
-    const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
-    if (!Op.isImm() && !Op.isReg())  {
-      ErrInfo = "Invalid operand type for VL operand";
-      return false;
-    }
-    if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
-      const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
-      auto *RC = MRI.getRegClass(Op.getReg());
-      if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
-        ErrInfo = "Invalid register class for VL operand";
-        return false;
-      }
-    }
-    if (!RISCVII::hasSEWOp(TSFlags)) {
-      ErrInfo = "VL operand w/o SEW operand?";
-      return false;
-    }
-  }
  if (RISCVII::hasSEWOp(TSFlags)) {
    unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
    uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
@ -1179,27 +1153,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
      return false;
    }
  }
-  if (RISCVII::hasVecPolicyOp(TSFlags)) {
-    unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
-    uint64_t Policy = MI.getOperand(OpIdx).getImm();
-    if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
-      ErrInfo = "Invalid Policy Value";
-      return false;
-    }
-    if (!RISCVII::hasVLOp(TSFlags)) {
-      ErrInfo = "policy operand w/o VL operand?";
-      return false;
-    }
-
-    // VecPolicy operands can only exist on instructions with passthru/merge
-    // arguments. Note that not all arguments with passthru have vec policy
-    // operands- some instructions have implicit policies.
-    unsigned UseOpIdx;
-    if (!MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
-      ErrInfo = "policy operand w/o tied operand?";
-      return false;
-    }
-  }

  return true;
 }
@ -1478,17 +1431,6 @@ std::string RISCVInstrInfo::createMIROperandComment(

  uint64_t TSFlags = MI.getDesc().TSFlags;

-  // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
-  // operand of vector codegen pseudos.
-  if (RISCVII::hasVecPolicyOp(TSFlags) &&
-             OpIdx == RISCVII::getVecPolicyOpNum(MI.getDesc())) {
-    unsigned Policy = MI.getOperand(OpIdx).getImm();
-    assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
-           "Invalid Policy Value");
-    OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
-       << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
-  }
-
  OS.flush();
  return Comment;
 }
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@ -103,13 +103,6 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  if (TFI->hasBP(MF))
    markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp

-  // V registers for code generation. We handle them manually.
-  markSuperRegs(Reserved, RISCV::VL);
-  markSuperRegs(Reserved, RISCV::VTYPE);
-  markSuperRegs(Reserved, RISCV::VXSAT);
-  markSuperRegs(Reserved, RISCV::VXRM);
-  markSuperRegs(Reserved, RISCV::VLENB); // vlenb (constant)
-
  // Floating point environment registers.
  markSuperRegs(Reserved, RISCV::FRM);
  markSuperRegs(Reserved, RISCV::FFLAGS);
@ -161,6 +154,39 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
  return true;
 }

+bool RISCVRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
+                                  Register Reg) const {
+  const TargetRegisterClass *RC;
+  if (Reg.isVirtual())
+    RC = MRI.getRegClass(Reg);
+  else
+    RC = getPhysRegClass(Reg);
+  return RC ? isSGPRClass(RC) : false;
+}
+
+const TargetRegisterClass *
+RISCVRegisterInfo::getPhysRegClass(MCRegister Reg) const {
+  static const TargetRegisterClass *const BaseClasses[] = {
+    /*
+    &RISCV::VGPR_LO16RegClass,
+    &RISCV::VGPR_HI16RegClass,
+    &RISCV::SReg_LO16RegClass,
+    &RISCV::SReg_HI16RegClass,
+    &RISCV::SReg_32RegClass,
+    */
+    &RISCV::VGPRRegClass,
+    &RISCV::GPRRegClass,
+  };
+
+  for (const TargetRegisterClass *BaseClass : BaseClasses) {
+    if (BaseClass->contains(Reg)) {
+      return BaseClass;
+    }
+  }
+  assert(0 && "TODO: Add sub/super registers");
+  return nullptr;
+}
+
 void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator II,
                                  const DebugLoc &DL, Register DestReg,
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@ -20,10 +20,43 @@

 namespace llvm {

+// This needs to be kept in sync with the field bits in VentusRegisterClass.
+enum RISCVRCFlags {
+  IsVGPR = 1 << 0,
+  IsSGPR = 1 << 1
+}; // enum RISCVRCFlags
+
+
 struct RISCVRegisterInfo : public RISCVGenRegisterInfo {

  RISCVRegisterInfo(unsigned HwMode);

+  /// \returns true if this class contains VGPR registers.
+  static bool hasVGPRs(const TargetRegisterClass *RC) {
+    return RC->TSFlags & RISCVRCFlags::IsVGPR;
+  }
+
+  /// \returns true if this class contains SGPR registers.
+  static bool hasSGPRs(const TargetRegisterClass *RC) {
+    return RC->TSFlags & RISCVRCFlags::IsSGPR;
+  }
+
+  /// Return the 'base' register class for this register.
+  /// e.g. X5 => SReg_32, V3 => VGPR_32, X5_X6 -> SReg_32, etc.
+  const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const;
+
+  /// \returns true if this class contains only SGPR registers
+  static bool isSGPRClass(const TargetRegisterClass *RC) {
+    return hasSGPRs(RC) && !hasVGPRs(RC);
+  }
+
+  /// \returns true if this class ID contains only SGPR registers
+  bool isSGPRClassID(unsigned RCID) const {
+    return isSGPRClass(getRegClass(RCID));
+  }
+
+  bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
+
  const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                       CallingConv::ID) const override;

@ -33,6 +66,10 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
  bool isAsmClobberable(const MachineFunction &MF,
                        MCRegister PhysReg) const override;

+  bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
+    return !isSGPRClass(RC);
+  }
+
  const uint32_t *getNoPreservedMask() const override;

  bool hasReservedSpillSlot(const MachineFunction &MF, Register Reg,
--- a/llvm/lib/Target/RISCV/RISCVSearchableTables.td
+++ b/llvm/lib/Target/RISCV/RISCVSearchableTables.td
@ -0,0 +1,75 @@
+//===-- RISCVSearchableTables.td ---------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Resource intrinsics table.
+//===----------------------------------------------------------------------===//
+
+class RsrcIntrinsic<RISCVRsrcIntrinsic intr> {
+  Intrinsic Intr = !cast<Intrinsic>(intr);
+  bits<8> RsrcArg = intr.RsrcArg;
+  bit IsImage = intr.IsImage;
+}
+/*
+def RsrcIntrinsics : GenericTable {
+  let FilterClass = "RsrcIntrinsic";
+  let Fields = ["Intr", "RsrcArg", "IsImage"];
+
+  let PrimaryKey = ["Intr"];
+  let PrimaryKeyName = "lookupRsrcIntrinsic";
+}
+
+foreach intr = !listconcat(RISCVBufferIntrinsics,
+                           RISCVImageDimIntrinsics,
+                           RISCVImageDimAtomicIntrinsics) in {
+  def : RsrcIntrinsic<!cast<RISCVRsrcIntrinsic>(intr)>;
+}
+*/
+class VentusBufferFormatBase<bits<8> f, bits<8> bpc, bits<8> numc,
+                             bits<8> nfmt, bits<8> dfmt> {
+  bits<8> Format = f;
+  bits<8> BitsPerComp = bpc;
+  bits<8> NumComponents = numc;
+  bits<8> NumFormat = nfmt;
+  bits<8> DataFormat = dfmt;
+}
+
+class VentusBufferFormatTable  : GenericTable {
+  let CppTypeName = "GcnBufferFormatInfo";
+  let Fields = ["Format", "BitsPerComp", "NumComponents", "NumFormat", "DataFormat"];
+  let PrimaryKey = ["BitsPerComp", "NumComponents", "NumFormat"];
+}
+
+class SourceOfDivergence<Intrinsic intr> {
+  Intrinsic Intr = intr;
+}
+
+def SourcesOfDivergence : GenericTable {
+  let FilterClass = "SourceOfDivergence";
+  let Fields = ["Intr"];
+
+  let PrimaryKey = ["Intr"];
+  let PrimaryKeyName = "lookupSourceOfDivergence";
+}
+
+def : SourceOfDivergence<int_riscv_workitem_id_x>;
+def : SourceOfDivergence<int_riscv_workitem_id_y>;
+def : SourceOfDivergence<int_riscv_workitem_id_z>;
+//def : SourceOfDivergence<int_riscv_interp_mov>;
+//def : SourceOfDivergence<int_riscv_interp_p1>;
+
+// The dummy boolean output is divergent from the IR's perspective,
+// but the mask results are uniform. These produce a divergent and
+// uniform result, so the returned struct is collectively divergent.
+// isAlwaysUniform can override the extract of the uniform component.
+//def : SourceOfDivergence<int_riscv_if>;
+//def : SourceOfDivergence<int_riscv_else>;
+//def : SourceOfDivergence<int_riscv_loop>;
+
+//foreach intr = RISCVImageDimAtomicIntrinsics in
+//def : SourceOfDivergence<intr>;
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@ -258,58 +258,8 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);

  // TODO: Handle scalar type.
-  if (!LT.second.isVector())
-    return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
-                                         Args, CxtI);
-
-
-  auto getConstantMatCost =
-    [&](unsigned Operand, TTI::OperandValueInfo OpInfo) -> InstructionCost {
-    if (OpInfo.isUniform() && TLI->canSplatOperand(Opcode, Operand))
-      // Two sub-cases:
-      // * Has a 5 bit immediate operand which can be splatted.
-      // * Has a larger immediate which must be materialized in scalar register
-      // We return 0 for both as we currently ignore the cost of materializing
-      // scalar constants in GPRs.
-      return 0;
-
-    // Add a cost of address generation + the cost of the vector load. The
-    // address is expected to be a PC relative offset to a constant pool entry
-    // using auipc/addi.
-    return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
-                               /*AddressSpace=*/0, CostKind);
-  };
-
-  // Add the cost of materializing any constant vectors required.
-  InstructionCost ConstantMatCost = 0;
-  if (Op1Info.isConstant())
-    ConstantMatCost += getConstantMatCost(0, Op1Info);
-  if (Op2Info.isConstant())
-    ConstantMatCost += getConstantMatCost(1, Op2Info);
-
-  switch (TLI->InstructionOpcodeToISD(Opcode)) {
-  case ISD::ADD:
-  case ISD::SUB:
-  case ISD::AND:
-  case ISD::OR:
-  case ISD::XOR:
-  case ISD::SHL:
-  case ISD::SRL:
-  case ISD::SRA:
-  case ISD::MUL:
-  case ISD::MULHS:
-  case ISD::MULHU:
-  case ISD::FADD:
-  case ISD::FSUB:
-  case ISD::FMUL:
-  case ISD::FNEG: {
-    return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
-  }
-  default:
-    return ConstantMatCost +
-           BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
-                                         Args, CxtI);
-  }
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info,
+                                       Args, CxtI);
 }

 void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
--- a/llvm/lib/Target/RISCV/VentusRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/VentusRegisterInfo.td
@ -27,6 +27,16 @@ let Namespace = "RISCV" in {
 //   vALUop vd, vs0, vs1
 // where the first instruction tells HW uses v[0-31] for vd, vs1,
 // uses v[32-63] for vs0.
+class RVRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
+  : RegisterClass <n, rTypes, Align, rList> {
+  // vALU and sALU registers
+  field bit IsVGPR = 0;
+  field bit IsSGPR = 0;
+
+  let TSFlags{0} = IsVGPR;
+  let TSFlags{1} = IsSGPR;
+}
+
 class RISCVReg<bits<8> Enc, string n, list<string> alt = []> : Register<n> {
  let HWEncoding{7-0} = Enc;
  let AltNames = alt;
@ -235,7 +245,7 @@ def XLenRI : RegInfoByHwMode<

 // The order of registers represents the preferred allocation sequence.
 // Registers are listed in the order caller-save, callee-save, specials.
-def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
+def GPR : RVRegisterClass<"RISCV", [XLenVT], 32, (add
    (sequence "X%u", 10, 17),
    (sequence "X%u", 5, 7),
    (sequence "X%u", 28, 31),
@ -244,49 +254,57 @@ def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
    (sequence "X%u", 0, 4)
  )> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

-def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
+def GPRX0 : RVRegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

-def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
+def GPRNoX0 : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

-def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
+def GPRNoX0X2 : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

 // Don't use X1 or X5 for JALR since that is a hint to pop the return address
 // stack on some microarchitectures. Also remove the reserved registers X0, X2,
 // X3, and X4 as it reduces the number of register classes that get synthesized
 // by tablegen.
-def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
+def GPRJALR : RVRegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

-def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
+def GPRC : RVRegisterClass<"RISCV", [XLenVT], 32, (add
    (sequence "X%u", 10, 15),
    (sequence "X%u", 8, 9)
  )> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

 // For indirect tail calls, we can't use callee-saved registers, as they are
 // restored to the saved value before the tail call, which would clobber a call
 // address. We shouldn't use x5 since that is a hint for to pop the return
 // address stack on some microarchitectures.
-def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
+def GPRTC : RVRegisterClass<"RISCV", [XLenVT], 32, (add
    (sequence "X%u", 6, 7),
    (sequence "X%u", 10, 17),
    (sequence "X%u", 28, 31)
  )> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

-def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
+def SP : RVRegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
  let RegInfos = XLenRI;
+  let IsSGPR = 1;
 }

 // Floating point registers
@ -466,11 +484,13 @@ def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16]>;
 // TODO: Add half and register pair support!!
 // FIMXE: BranchCC_vvi doesn't support f32 VGPR in pattern(riscv_brcc).
 // VGPR 32-bit registers class
-def VGPR : RegisterClass<"RISCV",
+def VGPR : RVRegisterClass<"RISCV",
                            // !listconcat(Reg32Types.types, Reg16Types.types),
                            [i32/*, f32*/],
                            32,
-                            (add (sequence "V%u", 0, 255))>;
+                            (add (sequence "V%u", 0, 255))> {
+  let IsVGPR = 1;
+}

 // VGPR 64-bit registers class
 def VGPR_64 : VentusRegisterTuples<getSubRegs<2>.ret, VGPR, 255, 1, 2, "v">;