diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 65fcfbc3f096..37bb61612e07 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -11333,7 +11333,7 @@ void VentusRISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
   unsigned NumRegsLeft = NumArgVGPRs;
   for (auto &Arg : FI.arguments()) {
     // FIXME: Is SPIR_KERNEL CC handled by upper layer?
-    if (CC == llvm::CallingConv::SPIR_KERNEL) {
+    if (CC == llvm::CallingConv::VENTUS_KERNEL) {
       Arg.info = classifyKernelArgumentType(Arg.type);
     } else {
       Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
@@ -11373,13 +11373,41 @@ ABIArgInfo VentusRISCVABIInfo::classifyReturnType(QualType RetTy) const {
   return classifyArgumentType(RetTy, ArgVGPRsLeft);
 }
 
+/// Almost the same as AMDGPU, because AMDGPU use buffer to deal with 
 ABIArgInfo VentusRISCVABIInfo::classifyKernelArgumentType(QualType Ty) const {
-  llvm_unreachable("TODO: Should we handle kernel arg here?");
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
+  // TODO: Can we omit empty structs?
+
+  if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+    Ty = QualType(SeltTy, 0);
+
+  llvm::Type *OrigLTy = CGT.ConvertType(Ty);
+  llvm::Type *LTy = OrigLTy;
+
+  // FIXME: Should also use this for OpenCL, but it requires addressing the
+  // problem of kernels being called.
+  //
+  // FIXME: This doesn't apply the optimization of coercing pointers in structs
+  // to global address space when using byref. This would require implementing a
+  // new kind of coercion of the in-memory type when for indirect arguments.
+  if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
+      isAggregateTypeForABI(Ty)) {
+    return ABIArgInfo::getIndirectAliased(
+        getContext().getTypeAlignInChars(Ty),
+        getContext().getTargetAddressSpace(LangAS::opencl_constant),
+        false /*Realign*/, nullptr /*Padding*/);
+  }
+
+  // If we set CanBeFlattened to true, CodeGen will expand the struct to its
+  // individual elements, which confuses the Clover OpenCL backend; therefore we
+  // have to set it to false here. Other args of getDirect() are just defaults.
+  return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
 ABIArgInfo VentusRISCVABIInfo::classifyArgumentType(QualType Ty,
                                                     unsigned &NumRegsLeft) const {
-  assert(NumRegsLeft <= NumArgVGPRs && "register estimate underflow");
+  assert(NumRegsLeft <= NumArgVGPRs && "Arg VGPR trcking underflow");
 
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -11458,7 +11486,11 @@ public:
 
     Fn->addFnAttr("interrupt", Kind);
   }
+  unsigned getOpenCLKernelCallingConv() const override;
 };
+  unsigned RISCVTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
+    return llvm::CallingConv::VENTUS_KERNEL;
+  }
 } // namespace
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenOpenCL/ventus-call-kernel.cl b/clang/test/CodeGenOpenCL/ventus-call-kernel.cl
new file mode 100644
index 000000000000..4f2037b7a5a8
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/ventus-call-kernel.cl
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
+
+kernel void test_kernel(global int *out)
+{
+  out[0] = 4;
+}
+
+// CHECK: define{{.*}} ventus_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
+// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
+__kernel void test_call_kernel(__global int *out)
+{
+  test_kernel(out);
+}
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 8a679007a504..d018979ae7c7 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -173,7 +173,7 @@ enum Kind {
   kw_amdgpu_kernel,
   kw_amdgpu_gfx,
   kw_tailcc,
-
+  kw_ventus_kernel,
   // Attributes:
   kw_attributes,
   kw_sync,
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index 9fefeef05cb2..1be51583a0d0 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -241,6 +241,9 @@ namespace CallingConv {
     /// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
     AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103,
 
+    /// Used for VENTUS code object kernels.
+    VENTUS_KERNEL = 104,
+
     /// The highest possible ID. Must be some 2^k - 1.
     MaxID = 1023
   };
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index c33dc9710f35..f833d81e60bb 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -631,6 +631,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(amdgpu_kernel);
   KEYWORD(amdgpu_gfx);
   KEYWORD(tailcc);
+  KEYWORD(ventus_kernel);
 
   KEYWORD(cc);
   KEYWORD(c);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 76cb5ee6944f..49ad85c69f97 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -2002,6 +2002,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
 ///   ::= 'amdgpu_cs'
 ///   ::= 'amdgpu_kernel'
 ///   ::= 'tailcc'
+///   ::= 'ventus_kernel'
 ///   ::= 'cc' UINT
 ///
 bool LLParser::parseOptionalCallingConv(unsigned &CC) {
@@ -2060,6 +2061,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_amdgpu_cs:      CC = CallingConv::AMDGPU_CS; break;
   case lltok::kw_amdgpu_kernel:  CC = CallingConv::AMDGPU_KERNEL; break;
   case lltok::kw_tailcc:         CC = CallingConv::Tail; break;
+  case lltok::kw_ventus_kernel:  CC = CallingConv::VENTUS_KERNEL; break;
   case lltok::kw_cc: {
       Lex.Lex();
       return parseUInt32(CC);
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 3ca17648a703..93db2c4a0a01 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -341,6 +341,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   case CallingConv::AMDGPU_CS:     Out << "amdgpu_cs"; break;
   case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
   case CallingConv::AMDGPU_Gfx:    Out << "amdgpu_gfx"; break;
+  case CallingConv::VENTUS_KERNEL:    Out << "ventus_kernel"; break;
   }
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 9ca8eb114351..1ba21a28d921 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -377,7 +377,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   // Allocate space on the local-mem stack and private-mem stack if necessary.
-  if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
+  if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
     RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
                 MachineInstr::FrameSetup, getStackAlign());
   else
@@ -575,7 +575,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
     StackSize = FirstSPAdjustAmount;
 
   // Deallocate stack
-  if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
+  if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
     RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize),
                   MachineInstr::FrameDestroy, getStackAlign());
   else
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 10eea6bd487f..47fd667cb18c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5804,7 +5804,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
 
   MachineFunction &MF = DAG.getMachineFunction();
 
-  bool IsKernel = CallConv == CallingConv::SPIR_KERNEL;
+  bool IsKernel = CallConv == CallingConv::VENTUS_KERNEL;
 
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   MVT XLenVT = Subtarget.getXLenVT();
@@ -7362,11 +7362,14 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
 bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
     const SDNode *N, FunctionLoweringInfo *FLI,
     LegacyDivergenceAnalysis *KDA) const {
+    N->isKnownSentinel();
+  const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
+  const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
+  // N->op_end();
+  // for(auto tt : N->op_end())
   switch (N->getOpcode()) {
   case ISD::CopyFromReg: {
     const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
-    const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
-    const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
     Register Reg = R->getReg();
 
     // FIXME: Why does this need to consider isLiveIn?
@@ -7378,6 +7381,15 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
 
     return !TRI->isSGPRReg(MRI, Reg);
   }
+  // case ISD::ADD:{
+  //   SDValue dd = N->getOperand(0);
+  //   if(dd->getOpcode() == ISD::CopyFromReg) {
+  //     dd->dump();
+  //     const RegisterSDNode *R = cast<RegisterSDNode>(dd->getOperand(1));
+  //     return TRI->isSGPRReg(MRI, R->getReg());
+  //   }
+  //   return false;
+  // }
   case ISD::LOAD: {
     const LoadSDNode *L = cast<LoadSDNode>(N);
     return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 139ae1d099b3..8978f97ebe03 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -68,7 +68,7 @@ private:
 
 public:
   RISCVMachineFunctionInfo(const MachineFunction &MF) : IsEntryFunction(
-      MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL) {}
+      MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL) {}
 
   MachineFunctionInfo *
   clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll
index 3bbe3b1ba6bc..99381749dfd1 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=VENTUS %s
 
-define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
+define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
 ; VENTUS-LABEL: func:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll
index 7732e7899c8c..36125e5efce5 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll
@@ -4,7 +4,7 @@
 
 @foo.b = internal addrspace(3) global [5 x i32] undef, align 4
 
-define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
+define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
 ; VENTUS-LABEL: foo:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi sp, sp, 48
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll
index 190371b704d8..fd6a5db6194f 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=VENTUS %s
 
-define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
+define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
 ; VENTUS-LABEL: foo_ker:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll
index bf4d762e0739..5f7df369b1a9 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \
 ; RUN:   | FileCheck -check-prefix=VENTUS %s
 
-define dso_local spir_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
+define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
 entry:
   %call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
   %arrayidx = getelementptr inbounds float, ptr addrspace(1) %B, i32 %call
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll
index 8213b1fc02d2..f5570d0cb4a5 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll
@@ -2,7 +2,7 @@
 ; RUN:   | FileCheck -check-prefix=VENTUS %s
 
 
-define dso_local spir_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
+define dso_local ventus_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
 entry:
 	; VENTUS: fadd.s a{{[1-9]}}, a2, a1
   %add1 = fadd float %c, %d
@@ -10,7 +10,7 @@ entry:
   ret void
 }
 
-define dso_local spir_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
+define dso_local ventus_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
 entry:
 	; VENTUS: fsub.s a{{[1-9]}}, a2, a1
   %sub = fsub float %c, %d
@@ -18,7 +18,7 @@ entry:
   ret void
 }
 
-define dso_local spir_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
+define dso_local ventus_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
 entry:
 	; VENTUS: fmul.s a{{[1-9]}}, a2, a1
   %mul = fmul float %c, %d
@@ -26,7 +26,7 @@ entry:
   ret void
 }
 
-define dso_local spir_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
+define dso_local ventus_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
 entry:
 	; VENTUS: fdiv.s a{{[1-9]}}, a2, a1
   %div = fdiv float %c, %d
@@ -34,7 +34,7 @@ entry:
   ret void
 }
 
-define dso_local spir_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
+define dso_local ventus_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result)  {
 entry:
 	; VENTUS: fmadd.s a{{[1-9]}}, a3, a2, a1
   %div = call float @llvm.fma.f32(float  %a, float  %b, float  %c)
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll
index e1e3badf3520..4f919c2ea441 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll
@@ -20,7 +20,7 @@ entry:
 }
 
 ; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
-define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
+define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
 ; VENTUS-LABEL: foo:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll
index bc9d96a95170..1d843e53fe47 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll
@@ -9,7 +9,7 @@
 
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
-define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
+define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
 ; VENTUS-LABEL: foo:
 ; VENTUS:       # %bb.0:
 ; VENTUS-NEXT:    lw a1, 0(a0)
diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll
index 4e446464b2be..6d4bc57f6517 100644
--- a/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll
+++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll
@@ -55,7 +55,7 @@ cleanup:                                          ; preds = %if.else, %entry, %i
   ret i32 %retval.0
 }
 
-define dso_local spir_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
+define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
 ; VENTUS-LABEL: loop_branch:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi sp, sp, 16
@@ -118,8 +118,7 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
-; FIXME: Fix this
-; define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
+; FIXME: Fix this 2 noundef %dim) local_unnamed_addr {
 ; VENTUS-LABEL: branch_in_branch:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi tp, tp, 16
@@ -198,7 +197,7 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
 ; }
 
 ; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
-define dso_local spir_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
+define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
 ; VENTUS-LABEL: double_loop:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi sp, sp, 16
@@ -282,7 +281,7 @@ for.body4:                                        ; preds = %for.cond1.preheader
 }
 
 ; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
-define dso_local spir_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
+define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
 ; VENTUS-LABEL: loop_switch:
 ; VENTUS:       # %bb.0: # %entry
 ; VENTUS-NEXT:    addi sp, sp, 16