[VENTUS][RISCV][feat] Set ventus kernel for OpenCL kernel functions
This commit is contained in:
parent
3fdda4cd8e
commit
967cb725c8
|
@ -11333,7 +11333,7 @@ void VentusRISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
|||
unsigned NumRegsLeft = NumArgVGPRs;
|
||||
for (auto &Arg : FI.arguments()) {
|
||||
// FIXME: Is SPIR_KERNEL CC handled by upper layer?
|
||||
if (CC == llvm::CallingConv::SPIR_KERNEL) {
|
||||
if (CC == llvm::CallingConv::VENTUS_KERNEL) {
|
||||
Arg.info = classifyKernelArgumentType(Arg.type);
|
||||
} else {
|
||||
Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
|
||||
|
@ -11373,13 +11373,41 @@ ABIArgInfo VentusRISCVABIInfo::classifyReturnType(QualType RetTy) const {
|
|||
return classifyArgumentType(RetTy, ArgVGPRsLeft);
|
||||
}
|
||||
|
||||
/// Almost the same as AMDGPU, because AMDGPU use buffer to deal with
|
||||
ABIArgInfo VentusRISCVABIInfo::classifyKernelArgumentType(QualType Ty) const {
|
||||
llvm_unreachable("TODO: Should we handle kernel arg here?");
|
||||
Ty = useFirstFieldIfTransparentUnion(Ty);
|
||||
|
||||
// TODO: Can we omit empty structs?
|
||||
|
||||
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
|
||||
Ty = QualType(SeltTy, 0);
|
||||
|
||||
llvm::Type *OrigLTy = CGT.ConvertType(Ty);
|
||||
llvm::Type *LTy = OrigLTy;
|
||||
|
||||
// FIXME: Should also use this for OpenCL, but it requires addressing the
|
||||
// problem of kernels being called.
|
||||
//
|
||||
// FIXME: This doesn't apply the optimization of coercing pointers in structs
|
||||
// to global address space when using byref. This would require implementing a
|
||||
// new kind of coercion of the in-memory type when for indirect arguments.
|
||||
if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
|
||||
isAggregateTypeForABI(Ty)) {
|
||||
return ABIArgInfo::getIndirectAliased(
|
||||
getContext().getTypeAlignInChars(Ty),
|
||||
getContext().getTargetAddressSpace(LangAS::opencl_constant),
|
||||
false /*Realign*/, nullptr /*Padding*/);
|
||||
}
|
||||
|
||||
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
|
||||
// individual elements, which confuses the Clover OpenCL backend; therefore we
|
||||
// have to set it to false here. Other args of getDirect() are just defaults.
|
||||
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
|
||||
}
|
||||
|
||||
ABIArgInfo VentusRISCVABIInfo::classifyArgumentType(QualType Ty,
|
||||
unsigned &NumRegsLeft) const {
|
||||
assert(NumRegsLeft <= NumArgVGPRs && "register estimate underflow");
|
||||
assert(NumRegsLeft <= NumArgVGPRs && "Arg VGPR trcking underflow");
|
||||
|
||||
Ty = useFirstFieldIfTransparentUnion(Ty);
|
||||
|
||||
|
@ -11458,7 +11486,11 @@ public:
|
|||
|
||||
Fn->addFnAttr("interrupt", Kind);
|
||||
}
|
||||
unsigned getOpenCLKernelCallingConv() const override;
|
||||
};
|
||||
unsigned RISCVTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
|
||||
return llvm::CallingConv::VENTUS_KERNEL;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
// RUN: %clang_cc1 -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
kernel void test_kernel(global int *out)
|
||||
{
|
||||
out[0] = 4;
|
||||
}
|
||||
|
||||
// CHECK: define{{.*}} ventus_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
|
||||
// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
|
||||
__kernel void test_call_kernel(__global int *out)
|
||||
{
|
||||
test_kernel(out);
|
||||
}
|
|
@ -173,7 +173,7 @@ enum Kind {
|
|||
kw_amdgpu_kernel,
|
||||
kw_amdgpu_gfx,
|
||||
kw_tailcc,
|
||||
|
||||
kw_ventus_kernel,
|
||||
// Attributes:
|
||||
kw_attributes,
|
||||
kw_sync,
|
||||
|
|
|
@ -241,6 +241,9 @@ namespace CallingConv {
|
|||
/// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
|
||||
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103,
|
||||
|
||||
/// Used for VENTUS code object kernels.
|
||||
VENTUS_KERNEL = 104,
|
||||
|
||||
/// The highest possible ID. Must be some 2^k - 1.
|
||||
MaxID = 1023
|
||||
};
|
||||
|
|
|
@ -631,6 +631,7 @@ lltok::Kind LLLexer::LexIdentifier() {
|
|||
KEYWORD(amdgpu_kernel);
|
||||
KEYWORD(amdgpu_gfx);
|
||||
KEYWORD(tailcc);
|
||||
KEYWORD(ventus_kernel);
|
||||
|
||||
KEYWORD(cc);
|
||||
KEYWORD(c);
|
||||
|
|
|
@ -2002,6 +2002,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
|
|||
/// ::= 'amdgpu_cs'
|
||||
/// ::= 'amdgpu_kernel'
|
||||
/// ::= 'tailcc'
|
||||
/// ::= 'ventus_kernel'
|
||||
/// ::= 'cc' UINT
|
||||
///
|
||||
bool LLParser::parseOptionalCallingConv(unsigned &CC) {
|
||||
|
@ -2060,6 +2061,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
|
|||
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
|
||||
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
|
||||
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
|
||||
case lltok::kw_ventus_kernel: CC = CallingConv::VENTUS_KERNEL; break;
|
||||
case lltok::kw_cc: {
|
||||
Lex.Lex();
|
||||
return parseUInt32(CC);
|
||||
|
|
|
@ -341,6 +341,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
|
|||
case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
|
||||
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
|
||||
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
|
||||
case CallingConv::VENTUS_KERNEL: Out << "ventus_kernel"; break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -377,7 +377,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
}
|
||||
|
||||
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
||||
if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
|
||||
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
|
||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
|
||||
MachineInstr::FrameSetup, getStackAlign());
|
||||
else
|
||||
|
@ -575,7 +575,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
StackSize = FirstSPAdjustAmount;
|
||||
|
||||
// Deallocate stack
|
||||
if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
|
||||
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
|
||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize),
|
||||
MachineInstr::FrameDestroy, getStackAlign());
|
||||
else
|
||||
|
|
|
@ -5804,7 +5804,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
|||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
bool IsKernel = CallConv == CallingConv::SPIR_KERNEL;
|
||||
bool IsKernel = CallConv == CallingConv::VENTUS_KERNEL;
|
||||
|
||||
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
MVT XLenVT = Subtarget.getXLenVT();
|
||||
|
@ -7362,11 +7362,14 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
|
|||
bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
||||
const SDNode *N, FunctionLoweringInfo *FLI,
|
||||
LegacyDivergenceAnalysis *KDA) const {
|
||||
N->isKnownSentinel();
|
||||
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
||||
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
|
||||
// N->op_end();
|
||||
// for(auto tt : N->op_end())
|
||||
switch (N->getOpcode()) {
|
||||
case ISD::CopyFromReg: {
|
||||
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
|
||||
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
|
||||
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
||||
Register Reg = R->getReg();
|
||||
|
||||
// FIXME: Why does this need to consider isLiveIn?
|
||||
|
@ -7378,6 +7381,15 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
|||
|
||||
return !TRI->isSGPRReg(MRI, Reg);
|
||||
}
|
||||
// case ISD::ADD:{
|
||||
// SDValue dd = N->getOperand(0);
|
||||
// if(dd->getOpcode() == ISD::CopyFromReg) {
|
||||
// dd->dump();
|
||||
// const RegisterSDNode *R = cast<RegisterSDNode>(dd->getOperand(1));
|
||||
// return TRI->isSGPRReg(MRI, R->getReg());
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
case ISD::LOAD: {
|
||||
const LoadSDNode *L = cast<LoadSDNode>(N);
|
||||
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
|
||||
|
|
|
@ -68,7 +68,7 @@ private:
|
|||
|
||||
public:
|
||||
RISCVMachineFunctionInfo(const MachineFunction &MF) : IsEntryFunction(
|
||||
MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL) {}
|
||||
MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL) {}
|
||||
|
||||
MachineFunctionInfo *
|
||||
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
|
||||
define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
|
||||
; VENTUS-LABEL: func:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
@foo.b = internal addrspace(3) global [5 x i32] undef, align 4
|
||||
|
||||
define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
||||
define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
||||
; VENTUS-LABEL: foo:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 48
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
; VENTUS-LABEL: foo_ker:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
define dso_local spir_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
|
||||
define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
|
||||
entry:
|
||||
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||
%arrayidx = getelementptr inbounds float, ptr addrspace(1) %B, i32 %call
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
|
||||
define dso_local spir_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
define dso_local ventus_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
entry:
|
||||
; VENTUS: fadd.s a{{[1-9]}}, a2, a1
|
||||
%add1 = fadd float %c, %d
|
||||
|
@ -10,7 +10,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
define dso_local spir_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
define dso_local ventus_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
entry:
|
||||
; VENTUS: fsub.s a{{[1-9]}}, a2, a1
|
||||
%sub = fsub float %c, %d
|
||||
|
@ -18,7 +18,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
define dso_local spir_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
define dso_local ventus_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
entry:
|
||||
; VENTUS: fmul.s a{{[1-9]}}, a2, a1
|
||||
%mul = fmul float %c, %d
|
||||
|
@ -26,7 +26,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
define dso_local spir_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
define dso_local ventus_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
entry:
|
||||
; VENTUS: fdiv.s a{{[1-9]}}, a2, a1
|
||||
%div = fdiv float %c, %d
|
||||
|
@ -34,7 +34,7 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
define dso_local spir_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
define dso_local ventus_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||
entry:
|
||||
; VENTUS: fmadd.s a{{[1-9]}}, a3, a2, a1
|
||||
%div = call float @llvm.fma.f32(float %a, float %b, float %c)
|
||||
|
|
|
@ -20,7 +20,7 @@ entry:
|
|||
}
|
||||
|
||||
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
|
||||
define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
|
||||
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
|
||||
; VENTUS-LABEL: foo:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
|
||||
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
|
||||
define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
|
||||
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
|
||||
; VENTUS-LABEL: foo:
|
||||
; VENTUS: # %bb.0:
|
||||
; VENTUS-NEXT: lw a1, 0(a0)
|
||||
|
|
|
@ -55,7 +55,7 @@ cleanup: ; preds = %if.else, %entry, %i
|
|||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
define dso_local spir_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
; VENTUS-LABEL: loop_branch:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
|
@ -118,8 +118,7 @@ for.body: ; preds = %for.body.lr.ph, %fo
|
|||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
; FIXME: Fix this
|
||||
; define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
||||
; FIXME: Fix this 2 noundef %dim) local_unnamed_addr {
|
||||
; VENTUS-LABEL: branch_in_branch:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 16
|
||||
|
@ -198,7 +197,7 @@ for.body: ; preds = %for.body.lr.ph, %fo
|
|||
; }
|
||||
|
||||
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
|
||||
define dso_local spir_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
; VENTUS-LABEL: double_loop:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
|
@ -282,7 +281,7 @@ for.body4: ; preds = %for.cond1.preheader
|
|||
}
|
||||
|
||||
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
|
||||
define dso_local spir_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||
; VENTUS-LABEL: loop_switch:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi sp, sp, 16
|
||||
|
|
Loading…
Reference in New Issue