[VENTUS][RISCV][feat] Set ventus kernel for OpenCL kernel functions
This commit is contained in:
parent
3fdda4cd8e
commit
967cb725c8
|
@ -11333,7 +11333,7 @@ void VentusRISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
||||||
unsigned NumRegsLeft = NumArgVGPRs;
|
unsigned NumRegsLeft = NumArgVGPRs;
|
||||||
for (auto &Arg : FI.arguments()) {
|
for (auto &Arg : FI.arguments()) {
|
||||||
// FIXME: Is SPIR_KERNEL CC handled by upper layer?
|
// FIXME: Is SPIR_KERNEL CC handled by upper layer?
|
||||||
if (CC == llvm::CallingConv::SPIR_KERNEL) {
|
if (CC == llvm::CallingConv::VENTUS_KERNEL) {
|
||||||
Arg.info = classifyKernelArgumentType(Arg.type);
|
Arg.info = classifyKernelArgumentType(Arg.type);
|
||||||
} else {
|
} else {
|
||||||
Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
|
Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
|
||||||
|
@ -11373,13 +11373,41 @@ ABIArgInfo VentusRISCVABIInfo::classifyReturnType(QualType RetTy) const {
|
||||||
return classifyArgumentType(RetTy, ArgVGPRsLeft);
|
return classifyArgumentType(RetTy, ArgVGPRsLeft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Almost the same as AMDGPU, because AMDGPU use buffer to deal with
|
||||||
ABIArgInfo VentusRISCVABIInfo::classifyKernelArgumentType(QualType Ty) const {
|
ABIArgInfo VentusRISCVABIInfo::classifyKernelArgumentType(QualType Ty) const {
|
||||||
llvm_unreachable("TODO: Should we handle kernel arg here?");
|
Ty = useFirstFieldIfTransparentUnion(Ty);
|
||||||
|
|
||||||
|
// TODO: Can we omit empty structs?
|
||||||
|
|
||||||
|
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
|
||||||
|
Ty = QualType(SeltTy, 0);
|
||||||
|
|
||||||
|
llvm::Type *OrigLTy = CGT.ConvertType(Ty);
|
||||||
|
llvm::Type *LTy = OrigLTy;
|
||||||
|
|
||||||
|
// FIXME: Should also use this for OpenCL, but it requires addressing the
|
||||||
|
// problem of kernels being called.
|
||||||
|
//
|
||||||
|
// FIXME: This doesn't apply the optimization of coercing pointers in structs
|
||||||
|
// to global address space when using byref. This would require implementing a
|
||||||
|
// new kind of coercion of the in-memory type when for indirect arguments.
|
||||||
|
if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
|
||||||
|
isAggregateTypeForABI(Ty)) {
|
||||||
|
return ABIArgInfo::getIndirectAliased(
|
||||||
|
getContext().getTypeAlignInChars(Ty),
|
||||||
|
getContext().getTargetAddressSpace(LangAS::opencl_constant),
|
||||||
|
false /*Realign*/, nullptr /*Padding*/);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
|
||||||
|
// individual elements, which confuses the Clover OpenCL backend; therefore we
|
||||||
|
// have to set it to false here. Other args of getDirect() are just defaults.
|
||||||
|
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
ABIArgInfo VentusRISCVABIInfo::classifyArgumentType(QualType Ty,
|
ABIArgInfo VentusRISCVABIInfo::classifyArgumentType(QualType Ty,
|
||||||
unsigned &NumRegsLeft) const {
|
unsigned &NumRegsLeft) const {
|
||||||
assert(NumRegsLeft <= NumArgVGPRs && "register estimate underflow");
|
assert(NumRegsLeft <= NumArgVGPRs && "Arg VGPR trcking underflow");
|
||||||
|
|
||||||
Ty = useFirstFieldIfTransparentUnion(Ty);
|
Ty = useFirstFieldIfTransparentUnion(Ty);
|
||||||
|
|
||||||
|
@ -11458,7 +11486,11 @@ public:
|
||||||
|
|
||||||
Fn->addFnAttr("interrupt", Kind);
|
Fn->addFnAttr("interrupt", Kind);
|
||||||
}
|
}
|
||||||
|
unsigned getOpenCLKernelCallingConv() const override;
|
||||||
};
|
};
|
||||||
|
unsigned RISCVTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
|
||||||
|
return llvm::CallingConv::VENTUS_KERNEL;
|
||||||
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
// RUN: %clang_cc1 -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
|
||||||
|
|
||||||
|
kernel void test_kernel(global int *out)
|
||||||
|
{
|
||||||
|
out[0] = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK: define{{.*}} ventus_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
|
||||||
|
// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
|
||||||
|
__kernel void test_call_kernel(__global int *out)
|
||||||
|
{
|
||||||
|
test_kernel(out);
|
||||||
|
}
|
|
@ -173,7 +173,7 @@ enum Kind {
|
||||||
kw_amdgpu_kernel,
|
kw_amdgpu_kernel,
|
||||||
kw_amdgpu_gfx,
|
kw_amdgpu_gfx,
|
||||||
kw_tailcc,
|
kw_tailcc,
|
||||||
|
kw_ventus_kernel,
|
||||||
// Attributes:
|
// Attributes:
|
||||||
kw_attributes,
|
kw_attributes,
|
||||||
kw_sync,
|
kw_sync,
|
||||||
|
|
|
@ -241,6 +241,9 @@ namespace CallingConv {
|
||||||
/// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
|
/// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
|
||||||
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103,
|
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103,
|
||||||
|
|
||||||
|
/// Used for VENTUS code object kernels.
|
||||||
|
VENTUS_KERNEL = 104,
|
||||||
|
|
||||||
/// The highest possible ID. Must be some 2^k - 1.
|
/// The highest possible ID. Must be some 2^k - 1.
|
||||||
MaxID = 1023
|
MaxID = 1023
|
||||||
};
|
};
|
||||||
|
|
|
@ -631,6 +631,7 @@ lltok::Kind LLLexer::LexIdentifier() {
|
||||||
KEYWORD(amdgpu_kernel);
|
KEYWORD(amdgpu_kernel);
|
||||||
KEYWORD(amdgpu_gfx);
|
KEYWORD(amdgpu_gfx);
|
||||||
KEYWORD(tailcc);
|
KEYWORD(tailcc);
|
||||||
|
KEYWORD(ventus_kernel);
|
||||||
|
|
||||||
KEYWORD(cc);
|
KEYWORD(cc);
|
||||||
KEYWORD(c);
|
KEYWORD(c);
|
||||||
|
|
|
@ -2002,6 +2002,7 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
|
||||||
/// ::= 'amdgpu_cs'
|
/// ::= 'amdgpu_cs'
|
||||||
/// ::= 'amdgpu_kernel'
|
/// ::= 'amdgpu_kernel'
|
||||||
/// ::= 'tailcc'
|
/// ::= 'tailcc'
|
||||||
|
/// ::= 'ventus_kernel'
|
||||||
/// ::= 'cc' UINT
|
/// ::= 'cc' UINT
|
||||||
///
|
///
|
||||||
bool LLParser::parseOptionalCallingConv(unsigned &CC) {
|
bool LLParser::parseOptionalCallingConv(unsigned &CC) {
|
||||||
|
@ -2060,6 +2061,7 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
|
||||||
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
|
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
|
||||||
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
|
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
|
||||||
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
|
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
|
||||||
|
case lltok::kw_ventus_kernel: CC = CallingConv::VENTUS_KERNEL; break;
|
||||||
case lltok::kw_cc: {
|
case lltok::kw_cc: {
|
||||||
Lex.Lex();
|
Lex.Lex();
|
||||||
return parseUInt32(CC);
|
return parseUInt32(CC);
|
||||||
|
|
|
@ -341,6 +341,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
|
||||||
case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
|
case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
|
||||||
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
|
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
|
||||||
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
|
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
|
||||||
|
case CallingConv::VENTUS_KERNEL: Out << "ventus_kernel"; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -377,7 +377,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
// Allocate space on the local-mem stack and private-mem stack if necessary.
|
||||||
if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
|
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
|
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
|
||||||
MachineInstr::FrameSetup, getStackAlign());
|
MachineInstr::FrameSetup, getStackAlign());
|
||||||
else
|
else
|
||||||
|
@ -575,7 +575,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||||
StackSize = FirstSPAdjustAmount;
|
StackSize = FirstSPAdjustAmount;
|
||||||
|
|
||||||
// Deallocate stack
|
// Deallocate stack
|
||||||
if(MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL)
|
if(MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL)
|
||||||
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize),
|
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize),
|
||||||
MachineInstr::FrameDestroy, getStackAlign());
|
MachineInstr::FrameDestroy, getStackAlign());
|
||||||
else
|
else
|
||||||
|
|
|
@ -5804,7 +5804,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||||
|
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
MachineFunction &MF = DAG.getMachineFunction();
|
||||||
|
|
||||||
bool IsKernel = CallConv == CallingConv::SPIR_KERNEL;
|
bool IsKernel = CallConv == CallingConv::VENTUS_KERNEL;
|
||||||
|
|
||||||
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
||||||
MVT XLenVT = Subtarget.getXLenVT();
|
MVT XLenVT = Subtarget.getXLenVT();
|
||||||
|
@ -7362,11 +7362,14 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
|
||||||
bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
||||||
const SDNode *N, FunctionLoweringInfo *FLI,
|
const SDNode *N, FunctionLoweringInfo *FLI,
|
||||||
LegacyDivergenceAnalysis *KDA) const {
|
LegacyDivergenceAnalysis *KDA) const {
|
||||||
|
N->isKnownSentinel();
|
||||||
|
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
||||||
|
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
|
||||||
|
// N->op_end();
|
||||||
|
// for(auto tt : N->op_end())
|
||||||
switch (N->getOpcode()) {
|
switch (N->getOpcode()) {
|
||||||
case ISD::CopyFromReg: {
|
case ISD::CopyFromReg: {
|
||||||
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
|
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
|
||||||
const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
|
|
||||||
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
|
|
||||||
Register Reg = R->getReg();
|
Register Reg = R->getReg();
|
||||||
|
|
||||||
// FIXME: Why does this need to consider isLiveIn?
|
// FIXME: Why does this need to consider isLiveIn?
|
||||||
|
@ -7378,6 +7381,15 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
||||||
|
|
||||||
return !TRI->isSGPRReg(MRI, Reg);
|
return !TRI->isSGPRReg(MRI, Reg);
|
||||||
}
|
}
|
||||||
|
// case ISD::ADD:{
|
||||||
|
// SDValue dd = N->getOperand(0);
|
||||||
|
// if(dd->getOpcode() == ISD::CopyFromReg) {
|
||||||
|
// dd->dump();
|
||||||
|
// const RegisterSDNode *R = cast<RegisterSDNode>(dd->getOperand(1));
|
||||||
|
// return TRI->isSGPRReg(MRI, R->getReg());
|
||||||
|
// }
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
case ISD::LOAD: {
|
case ISD::LOAD: {
|
||||||
const LoadSDNode *L = cast<LoadSDNode>(N);
|
const LoadSDNode *L = cast<LoadSDNode>(N);
|
||||||
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
|
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
|
||||||
|
|
|
@ -68,7 +68,7 @@ private:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
RISCVMachineFunctionInfo(const MachineFunction &MF) : IsEntryFunction(
|
RISCVMachineFunctionInfo(const MachineFunction &MF) : IsEntryFunction(
|
||||||
MF.getFunction().getCallingConv() == CallingConv::SPIR_KERNEL) {}
|
MF.getFunction().getCallingConv() == CallingConv::VENTUS_KERNEL) {}
|
||||||
|
|
||||||
MachineFunctionInfo *
|
MachineFunctionInfo *
|
||||||
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
|
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||||
|
|
||||||
define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
|
define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) {
|
||||||
; VENTUS-LABEL: func:
|
; VENTUS-LABEL: func:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi sp, sp, 16
|
; VENTUS-NEXT: addi sp, sp, 16
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
@foo.b = internal addrspace(3) global [5 x i32] undef, align 4
|
@foo.b = internal addrspace(3) global [5 x i32] undef, align 4
|
||||||
|
|
||||||
define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
||||||
; VENTUS-LABEL: foo:
|
; VENTUS-LABEL: foo:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi sp, sp, 48
|
; VENTUS-NEXT: addi sp, sp, 48
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||||
|
|
||||||
define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||||
; VENTUS-LABEL: foo_ker:
|
; VENTUS-LABEL: foo_ker:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi sp, sp, 16
|
; VENTUS-NEXT: addi sp, sp, 16
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \
|
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O1 < %s \
|
||||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||||
|
|
||||||
define dso_local spir_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
|
define dso_local ventus_kernel void @_kernel(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B){
|
||||||
entry:
|
entry:
|
||||||
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
||||||
%arrayidx = getelementptr inbounds float, ptr addrspace(1) %B, i32 %call
|
%arrayidx = getelementptr inbounds float, ptr addrspace(1) %B, i32 %call
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||||
|
|
||||||
|
|
||||||
define dso_local spir_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
define dso_local ventus_kernel void @fadd(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||||
entry:
|
entry:
|
||||||
; VENTUS: fadd.s a{{[1-9]}}, a2, a1
|
; VENTUS: fadd.s a{{[1-9]}}, a2, a1
|
||||||
%add1 = fadd float %c, %d
|
%add1 = fadd float %c, %d
|
||||||
|
@ -10,7 +10,7 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define dso_local spir_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
define dso_local ventus_kernel void @fsub(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||||
entry:
|
entry:
|
||||||
; VENTUS: fsub.s a{{[1-9]}}, a2, a1
|
; VENTUS: fsub.s a{{[1-9]}}, a2, a1
|
||||||
%sub = fsub float %c, %d
|
%sub = fsub float %c, %d
|
||||||
|
@ -18,7 +18,7 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define dso_local spir_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
define dso_local ventus_kernel void @fmul(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||||
entry:
|
entry:
|
||||||
; VENTUS: fmul.s a{{[1-9]}}, a2, a1
|
; VENTUS: fmul.s a{{[1-9]}}, a2, a1
|
||||||
%mul = fmul float %c, %d
|
%mul = fmul float %c, %d
|
||||||
|
@ -26,7 +26,7 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define dso_local spir_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
define dso_local ventus_kernel void @fdiv(float noundef %c, float noundef %d, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||||
entry:
|
entry:
|
||||||
; VENTUS: fdiv.s a{{[1-9]}}, a2, a1
|
; VENTUS: fdiv.s a{{[1-9]}}, a2, a1
|
||||||
%div = fdiv float %c, %d
|
%div = fdiv float %c, %d
|
||||||
|
@ -34,7 +34,7 @@ entry:
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define dso_local spir_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
define dso_local ventus_kernel void @fmadd(float noundef %a, float noundef %b, float noundef %c, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
|
||||||
entry:
|
entry:
|
||||||
; VENTUS: fmadd.s a{{[1-9]}}, a3, a2, a1
|
; VENTUS: fmadd.s a{{[1-9]}}, a3, a2, a1
|
||||||
%div = call float @llvm.fma.f32(float %a, float %b, float %c)
|
%div = call float @llvm.fma.f32(float %a, float %b, float %c)
|
||||||
|
|
|
@ -20,7 +20,7 @@ entry:
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
|
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
|
||||||
define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
|
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
|
||||||
; VENTUS-LABEL: foo:
|
; VENTUS-LABEL: foo:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi sp, sp, 16
|
; VENTUS-NEXT: addi sp, sp, 16
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
|
|
||||||
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
|
||||||
define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
|
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) nocapture noundef writeonly align 4 %c) {
|
||||||
; VENTUS-LABEL: foo:
|
; VENTUS-LABEL: foo:
|
||||||
; VENTUS: # %bb.0:
|
; VENTUS: # %bb.0:
|
||||||
; VENTUS-NEXT: lw a1, 0(a0)
|
; VENTUS-NEXT: lw a1, 0(a0)
|
||||||
|
|
|
@ -55,7 +55,7 @@ cleanup: ; preds = %if.else, %entry, %i
|
||||||
ret i32 %retval.0
|
ret i32 %retval.0
|
||||||
}
|
}
|
||||||
|
|
||||||
define dso_local spir_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||||
; VENTUS-LABEL: loop_branch:
|
; VENTUS-LABEL: loop_branch:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi sp, sp, 16
|
; VENTUS-NEXT: addi sp, sp, 16
|
||||||
|
@ -118,8 +118,7 @@ for.body: ; preds = %for.body.lr.ph, %fo
|
||||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: Fix this
|
; FIXME: Fix this 2 noundef %dim) local_unnamed_addr {
|
||||||
; define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
|
||||||
; VENTUS-LABEL: branch_in_branch:
|
; VENTUS-LABEL: branch_in_branch:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi tp, tp, 16
|
; VENTUS-NEXT: addi tp, tp, 16
|
||||||
|
@ -198,7 +197,7 @@ for.body: ; preds = %for.body.lr.ph, %fo
|
||||||
; }
|
; }
|
||||||
|
|
||||||
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
|
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
|
||||||
define dso_local spir_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||||
; VENTUS-LABEL: double_loop:
|
; VENTUS-LABEL: double_loop:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi sp, sp, 16
|
; VENTUS-NEXT: addi sp, sp, 16
|
||||||
|
@ -282,7 +281,7 @@ for.body4: ; preds = %for.cond1.preheader
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
|
; Function Attrs: convergent nofree norecurse nounwind memory(argmem: readwrite) vscale_range(1,2048)
|
||||||
define dso_local spir_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
||||||
; VENTUS-LABEL: loop_switch:
|
; VENTUS-LABEL: loop_switch:
|
||||||
; VENTUS: # %bb.0: # %entry
|
; VENTUS: # %bb.0: # %entry
|
||||||
; VENTUS-NEXT: addi sp, sp, 16
|
; VENTUS-NEXT: addi sp, sp, 16
|
||||||
|
|
Loading…
Reference in New Issue