[VENTUS][RISCV][feat] Set ventus kernel for OpenCL kernel functions

This commit is contained in:
zhoujing 2023-06-05 13:10:35 +08:00
parent 3fdda4cd8e
commit 967cb725c8
18 changed files with 89 additions and 26 deletions

View File

@ -11333,7 +11333,7 @@ void VentusRISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
unsigned NumRegsLeft = NumArgVGPRs;
for (auto &Arg : FI.arguments()) {
// FIXME: Is SPIR_KERNEL CC handled by upper layer?
if (CC == llvm::CallingConv::SPIR_KERNEL) {
if (CC == llvm::CallingConv::VENTUS_KERNEL) {
Arg.info = classifyKernelArgumentType(Arg.type);
} else {
Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
@ -11373,13 +11373,41 @@ ABIArgInfo VentusRISCVABIInfo::classifyReturnType(QualType RetTy) const {
return classifyArgumentType(RetTy, ArgVGPRsLeft);
}
/// Almost the same as AMDGPU, because AMDGPU use buffer to deal with
ABIArgInfo VentusRISCVABIInfo::classifyKernelArgumentType(QualType Ty) const {
llvm_unreachable("TODO: Should we handle kernel arg here?");
Ty = useFirstFieldIfTransparentUnion(Ty);
// TODO: Can we omit empty structs?
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
Ty = QualType(SeltTy, 0);
llvm::Type *OrigLTy = CGT.ConvertType(Ty);
llvm::Type *LTy = OrigLTy;
// FIXME: Should also use this for OpenCL, but it requires addressing the
// problem of kernels being called.
//
// FIXME: This doesn't apply the optimization of coercing pointers in structs
// to global address space when using byref. This would require implementing a
// new kind of coercion of the in-memory type when for indirect arguments.
if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
isAggregateTypeForABI(Ty)) {
return ABIArgInfo::getIndirectAliased(
getContext().getTypeAlignInChars(Ty),
getContext().getTargetAddressSpace(LangAS::opencl_constant),
false /*Realign*/, nullptr /*Padding*/);
}
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
// have to set it to false here. Other args of getDirect() are just defaults.
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
ABIArgInfo VentusRISCVABIInfo::classifyArgumentType(QualType Ty,
unsigned &NumRegsLeft) const {
assert(NumRegsLeft <= NumArgVGPRs && "register estimate underflow");
assert(NumRegsLeft <= NumArgVGPRs && "Arg VGPR trcking underflow");
Ty = useFirstFieldIfTransparentUnion(Ty);
@ -11458,7 +11486,11 @@ public:
Fn->addFnAttr("interrupt", Kind);
}
unsigned getOpenCLKernelCallingConv() const override;
};
unsigned RISCVTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
return llvm::CallingConv::VENTUS_KERNEL;
}
} // namespace
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,13 @@
// RUN: %clang_cc1 -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
kernel void test_kernel(global int *out)
{
out[0] = 4;
}
// CHECK: define{{.*}} ventus_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
__kernel void test_call_kernel(__global int *out)
{
test_kernel(out);
}

View File

@ -173,7 +173,7 @@ enum Kind {
kw_amdgpu_kernel,
kw_amdgpu_gfx,
kw_tailcc,
kw_ventus_kernel,
// Attributes:
kw_attributes,
kw_sync,

View File

@ -241,6 +241,9 @@ namespace CallingConv {
/// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103,
/// Used for VENTUS code object kernels.
VENTUS_KERNEL = 104,
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};

View File

@ -631,6 +631,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(amdgpu_kernel);
KEYWORD(amdgpu_gfx);
KEYWORD(tailcc);
KEYWORD(ventus_kernel);
KEYWORD(cc);
KEYWORD(c);

View File

@ -2002,6 +2002,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'amdgpu_cs'
/// ::= 'amdgpu_kernel'
/// ::= 'tailcc'
/// ::= 'ventus_kernel'
/// ::= 'cc' UINT
///
bool LLParser::parseOptionalCallingConv(unsigned &CC) {
@ -2060,6 +2061,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
case lltok::kw_ventus_kernel: CC = CallingConv::VENTUS_KERNEL; break;
case lltok::kw_cc: {
Lex.Lex();
return parseUInt32(CC);

View File

@ -341,6 +341,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
case CallingConv::VENTUS_KERNEL: Out << "ventus_kernel"; break;
}
}

View File

@ -377,7 +377,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
}
// Allocate space on the local-mem stack and private-mem stack if necessary.
if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
MachineInstr::FrameSetup, getStackAlign());
else
@ -575,7 +575,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
StackSize = FirstSPAdjustAmount;
// Deallocate stack
if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize),
MachineInstr::FrameDestroy, getStackAlign());
else

View File

@ -5804,7 +5804,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
MachineFunction &MF = DAG.getMachineFunction();
bool IsKernel = CallConv == CallingConv::SPIR_KERNEL;
bool IsKernel = CallConv == CallingConv::VENTUS_KERNEL;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT XLenVT = Subtarget.getXLenVT();
@ -7362,11 +7362,14 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
const SDNode *N, FunctionLoweringInfo *FLI,
LegacyDivergenceAnalysis *KDA) const {
N->isKnownSentinel();
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
// N->op_end();
// for(auto tt : N->op_end())
switch (N->getOpcode()) {
case ISD::CopyFromReg: {
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
Register Reg = R->getReg();
// FIXME: Why does this need to consider isLiveIn?
@ -7378,6 +7381,15 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
return !TRI->isSGPRReg(MRI, Reg);
}
// case ISD::ADD:{
// SDValue dd = N->getOperand(0);
// if(dd->getOpcode() == ISD::CopyFromReg) {
// dd->dump();
// const RegisterSDNode *R = cast<RegisterSDNode>(dd->getOperand(1));
// return TRI->isSGPRReg(MRI, R->getReg());
// }
// return false;
// }
case ISD::LOAD: {
const LoadSDNode *L = cast<LoadSDNode>(N);
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;

View File

@ -68,7 +68,7 @@ private:
public:
RISCVMachineFunctionInfo(const MachineFunction &MF) : IsEntryFunction(
MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL) {}
MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL) {}
MachineFunctionInfo *
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,

View File

@ -2,7 +2,7 @@
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: func:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16

View File

@ -4,7 +4,7 @@
@foo.b = internal addrspace(3) global [5 x i32] undef, align 4
define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 48

View File

@ -2,7 +2,7 @@
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: foo_ker:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16

View File

@ -1,7 +1,7 @@
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
define dso_local spir_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%arrayidx = getelementptr inbounds float, ptr addrspace(1) %B, i32 %call

View File

@ -2,7 +2,7 @@
; RUN: | FileCheck -check-prefix=VENTUS %s
define dso_local spir_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
define dso_local ventus_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
entry:
; VENTUS: fadd.s a{{[1-9]}}, a2, a1
%add1 = fadd float %c, %d
@ -10,7 +10,7 @@ entry:
ret void
}
define dso_local spir_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
define dso_local ventus_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
entry:
; VENTUS: fsub.s a{{[1-9]}}, a2, a1
%sub = fsub float %c, %d
@ -18,7 +18,7 @@ entry:
ret void
}
define dso_local spir_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
define dso_local ventus_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
entry:
; VENTUS: fmul.s a{{[1-9]}}, a2, a1
%mul = fmul float %c, %d
@ -26,7 +26,7 @@ entry:
ret void
}
define dso_local spir_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
define dso_local ventus_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
entry:
; VENTUS: fdiv.s a{{[1-9]}}, a2, a1
%div = fdiv float %c, %d
@ -34,7 +34,7 @@ entry:
ret void
}
define dso_local spir_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
define dso_local ventus_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
entry:
; VENTUS: fmadd.s a{{[1-9]}}, a3, a2, a1
%div = call float @llvm.fma.f32(float %a, float %b, float %c)

View File

@ -20,7 +20,7 @@ entry:
}
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16

View File

@ -9,7 +9,7 @@
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0:
; VENTUS-NEXT: lw a1, 0(a0)

View File

@ -55,7 +55,7 @@ cleanup: ; preds = %if.else, %entry, %i
ret i32 %retval.0
}
define dso_local spir_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: loop_branch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
@ -118,8 +118,7 @@ for.body: ; preds = %for.body.lr.ph, %fo
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
}
; FIXME: Fix this
; define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; FIXME: Fix this 2 noundef %dim) local_unnamed_addr {
; VENTUS-LABEL: branch_in_branch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 16
@ -198,7 +197,7 @@ for.body: ; preds = %for.body.lr.ph, %fo
; }
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
define dso_local spir_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: double_loop:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
@ -282,7 +281,7 @@ for.body4: ; preds = %for.cond1.preheader
}
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
define dso_local spir_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: loop_switch:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16