diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 65fcfbc3f096..37bb61612e07 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -11333,7 +11333,7 @@ void VentusRISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { unsigned NumRegsLeft = NumArgVGPRs; for (auto &Arg : FI.arguments()) { // FIXME: Is SPIR_KERNEL CC handled by upper layer? - if (CC == llvm::CallingConv::SPIR_KERNEL) { + if (CC == llvm::CallingConv::VENTUS_KERNEL) { Arg.info = classifyKernelArgumentType(Arg.type); } else { Arg.info = classifyArgumentType(Arg.type, NumRegsLeft); @@ -11373,13 +11373,41 @@ ABIArgInfo VentusRISCVABIInfo::classifyReturnType(QualType RetTy) const { return classifyArgumentType(RetTy, ArgVGPRsLeft); } +/// Almost the same as AMDGPU, because AMDGPU use buffer to deal with ABIArgInfo VentusRISCVABIInfo::classifyKernelArgumentType(QualType Ty) const { - llvm_unreachable("TODO: Should we handle kernel arg here?"); + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: Can we omit empty structs? + + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + Ty = QualType(SeltTy, 0); + + llvm::Type *OrigLTy = CGT.ConvertType(Ty); + llvm::Type *LTy = OrigLTy; + + // FIXME: Should also use this for OpenCL, but it requires addressing the + // problem of kernels being called. + // + // FIXME: This doesn't apply the optimization of coercing pointers in structs + // to global address space when using byref. This would require implementing a + // new kind of coercion of the in-memory type when for indirect arguments. + if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy && + isAggregateTypeForABI(Ty)) { + return ABIArgInfo::getIndirectAliased( + getContext().getTypeAlignInChars(Ty), + getContext().getTargetAddressSpace(LangAS::opencl_constant), + false /*Realign*/, nullptr /*Padding*/); + } + + // If we set CanBeFlattened to true, CodeGen will expand the struct to its + // individual elements, which confuses the Clover OpenCL backend; therefore we + // have to set it to false here. Other args of getDirect() are just defaults. + return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } ABIArgInfo VentusRISCVABIInfo::classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const { - assert(NumRegsLeft <= NumArgVGPRs && "register estimate underflow"); + assert(NumRegsLeft <= NumArgVGPRs && "Arg VGPR trcking underflow"); Ty = useFirstFieldIfTransparentUnion(Ty); @@ -11458,7 +11486,11 @@ public: Fn->addFnAttr("interrupt", Kind); } + unsigned getOpenCLKernelCallingConv() const override; }; + unsigned RISCVTargetCodeGenInfo::getOpenCLKernelCallingConv() const { + return llvm::CallingConv::VENTUS_KERNEL; + } } // namespace //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenOpenCL/ventus-call-kernel.cl b/clang/test/CodeGenOpenCL/ventus-call-kernel.cl new file mode 100644 index 000000000000..4f2037b7a5a8 --- /dev/null +++ b/clang/test/CodeGenOpenCL/ventus-call-kernel.cl @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s + +kernel void test_kernel(global int *out) +{ + out[0] = 4; +} + +// CHECK: define{{.*}} ventus_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out) +// CHECK: store i32 4, i32 addrspace(1)* %out, align 4 +__kernel void test_call_kernel(__global int *out) +{ + test_kernel(out); +} diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 8a679007a504..d018979ae7c7 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -173,7 +173,7 @@ enum Kind { kw_amdgpu_kernel, kw_amdgpu_gfx, kw_tailcc, - + kw_ventus_kernel, // Attributes: kw_attributes, kw_sync, diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index 9fefeef05cb2..1be51583a0d0 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -241,6 +241,9 @@ namespace CallingConv { /// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15. AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103, + /// Used for VENTUS code object kernels. + VENTUS_KERNEL = 104, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index c33dc9710f35..f833d81e60bb 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -631,6 +631,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(amdgpu_kernel); KEYWORD(amdgpu_gfx); KEYWORD(tailcc); + KEYWORD(ventus_kernel); KEYWORD(cc); KEYWORD(c); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 76cb5ee6944f..49ad85c69f97 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -2002,6 +2002,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'amdgpu_cs' /// ::= 'amdgpu_kernel' /// ::= 'tailcc' +/// ::= 'ventus_kernel' /// ::= 'cc' UINT /// bool LLParser::parseOptionalCallingConv(unsigned &CC) { @@ -2060,6 +2061,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) { case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; case lltok::kw_tailcc: CC = CallingConv::Tail; break; + case lltok::kw_ventus_kernel: CC = CallingConv::VENTUS_KERNEL; break; case lltok::kw_cc: { Lex.Lex(); return parseUInt32(CC); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 3ca17648a703..93db2c4a0a01 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -341,6 +341,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break; + case CallingConv::VENTUS_KERNEL: Out << "ventus_kernel"; break; } } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 9ca8eb114351..1ba21a28d921 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -377,7 +377,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, } // Allocate space on the local-mem stack and private-mem stack if necessary. - if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL) + if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL) RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize), MachineInstr::FrameSetup, getStackAlign()); else @@ -575,7 +575,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, StackSize = FirstSPAdjustAmount; // Deallocate stack - if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL) + if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL) RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize), MachineInstr::FrameDestroy, getStackAlign()); else diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 10eea6bd487f..47fd667cb18c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5804,7 +5804,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( MachineFunction &MF = DAG.getMachineFunction(); - bool IsKernel = CallConv == CallingConv::SPIR_KERNEL; + bool IsKernel = CallConv == CallingConv::VENTUS_KERNEL; EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); @@ -7362,11 +7362,14 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { bool RISCVTargetLowering::isSDNodeSourceOfDivergence( const SDNode *N, FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *KDA) const { + N->isKnownSentinel(); + const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const MachineRegisterInfo &MRI = FLI->MF->getRegInfo(); + // N->op_end(); + // for(auto tt : N->op_end()) switch (N->getOpcode()) { case ISD::CopyFromReg: { const RegisterSDNode *R = cast(N->getOperand(1)); - const MachineRegisterInfo &MRI = FLI->MF->getRegInfo(); - const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); Register Reg = R->getReg(); // FIXME: Why does this need to consider isLiveIn? @@ -7378,6 +7381,15 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence( return !TRI->isSGPRReg(MRI, Reg); } + // case ISD::ADD:{ + // SDValue dd = N->getOperand(0); + // if(dd->getOpcode() == ISD::CopyFromReg) { + // dd->dump(); + // const RegisterSDNode *R = cast(dd->getOperand(1)); + // return TRI->isSGPRReg(MRI, R->getReg()); + // } + // return false; + // } case ISD::LOAD: { const LoadSDNode *L = cast(N); return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS; diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index 139ae1d099b3..8978f97ebe03 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -68,7 +68,7 @@ private: public: RISCVMachineFunctionInfo(const MachineFunction &MF) : IsEntryFunction( - MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL) {} + MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL) {} MachineFunctionInfo * clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF, diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll index 3bbe3b1ba6bc..99381749dfd1 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=VENTUS %s -define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) { +define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) { ; VENTUS-LABEL: func: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll index 7732e7899c8c..36125e5efce5 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll @@ -4,7 +4,7 @@ @foo.b = internal addrspace(3) global [5 x i32] undef, align 4 -define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { +define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { ; VENTUS-LABEL: foo: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll index 190371b704d8..fd6a5db6194f 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=VENTUS %s -define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { +define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { ; VENTUS-LABEL: foo_ker: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll index bf4d762e0739..5f7df369b1a9 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \ ; RUN: | FileCheck -check-prefix=VENTUS %s -define dso_local spir_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){ +define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){ entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) %arrayidx = getelementptr inbounds float, ptr addrspace(1) %B, i32 %call diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll index 8213b1fc02d2..f5570d0cb4a5 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/float-arith-zfinx.ll @@ -2,7 +2,7 @@ ; RUN: | FileCheck -check-prefix=VENTUS %s -define dso_local spir_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { +define dso_local ventus_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { entry: ; VENTUS: fadd.s a{{[1-9]}}, a2, a1 %add1 = fadd float %c, %d @@ -10,7 +10,7 @@ entry: ret void } -define dso_local spir_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { +define dso_local ventus_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { entry: ; VENTUS: fsub.s a{{[1-9]}}, a2, a1 %sub = fsub float %c, %d @@ -18,7 +18,7 @@ entry: ret void } -define dso_local spir_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { +define dso_local ventus_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { entry: ; VENTUS: fmul.s a{{[1-9]}}, a2, a1 %mul = fmul float %c, %d @@ -26,7 +26,7 @@ entry: ret void } -define dso_local spir_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { +define dso_local ventus_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { entry: ; VENTUS: fdiv.s a{{[1-9]}}, a2, a1 %div = fdiv float %c, %d @@ -34,7 +34,7 @@ entry: ret void } -define dso_local spir_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { +define dso_local ventus_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { entry: ; VENTUS: fmadd.s a{{[1-9]}}, a3, a2, a1 %div = call float @llvm.fma.f32(float %a, float %b, float %c) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll index e1e3badf3520..4f919c2ea441 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll @@ -20,7 +20,7 @@ entry: } ; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048) -define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) { +define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) { ; VENTUS-LABEL: foo: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll index bc9d96a95170..1d843e53fe47 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/kernel_args.ll @@ -9,7 +9,7 @@ ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048) -define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) { +define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) { ; VENTUS-LABEL: foo: ; VENTUS: # %bb.0: ; VENTUS-NEXT: lw a1, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll index 4e446464b2be..6d4bc57f6517 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll @@ -55,7 +55,7 @@ cleanup: ; preds = %if.else, %entry, %i ret i32 %retval.0 } -define dso_local spir_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { +define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { ; VENTUS-LABEL: loop_branch: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 16 @@ -118,8 +118,7 @@ for.body: ; preds = %for.body.lr.ph, %fo br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } -; FIXME: Fix this -; define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr { +; FIXME: Fix this 2 noundef %dim) local_unnamed_addr { ; VENTUS-LABEL: branch_in_branch: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi tp, tp, 16 @@ -198,7 +197,7 @@ for.body: ; preds = %for.body.lr.ph, %fo ; } ; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048) -define dso_local spir_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { +define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { ; VENTUS-LABEL: double_loop: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 16 @@ -282,7 +281,7 @@ for.body4: ; preds = %for.cond1.preheader } ; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048) -define dso_local spir_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { +define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { ; VENTUS-LABEL: loop_switch: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 16