mirror of https://github.com/microsoft/clang.git
[NVPTX, CUDA] Added support for m8n32k16 and m32n8k16 variants of wmma instructions.
The new instructions were added added for sm_70+ GPUs in CUDA-9.1. Differential Revision: https://reviews.llvm.org/D45068 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@330296 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
202f222f94
commit
c4d3d32435
|
@ -18,11 +18,18 @@
|
|||
# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
|
||||
#endif
|
||||
|
||||
#pragma push_macro("SM_70")
|
||||
#define SM_70 "sm_70|sm_71"
|
||||
#pragma push_macro("SM_60")
|
||||
#define SM_60 "sm_60|sm_61|sm_62|sm_70|sm_71"
|
||||
#define SM_60 "sm_60|sm_61|sm_62|" SM_70
|
||||
|
||||
#pragma push_macro("PTX61")
|
||||
#define PTX61 "ptx61"
|
||||
#pragma push_macro("PTX60")
|
||||
#define PTX60 "ptx60|ptx61"
|
||||
#define PTX60 "ptx60|" PTX61
|
||||
|
||||
#pragma push_macro("AND")
|
||||
#define AND(a, b) a "," b
|
||||
|
||||
// Special Registers
|
||||
|
||||
|
@ -698,19 +705,46 @@ BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
|
|||
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
|
||||
|
||||
// Builtins to support WMMA instructions on sm_70
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX60))
|
||||
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", PTX60)
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
|
||||
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*i*UiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*f*UiIi", "", AND(SM_70,PTX61))
|
||||
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
|
||||
TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60))
|
||||
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
|
||||
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
|
||||
TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61))
|
||||
|
||||
#undef BUILTIN
|
||||
#undef TARGET_BUILTIN
|
||||
#pragma pop_macro("AND")
|
||||
#pragma pop_macro("SM_60")
|
||||
#pragma pop_macro("SM_70")
|
||||
#pragma pop_macro("PTX60")
|
||||
#pragma pop_macro("PTX61")
|
||||
|
|
|
@ -10715,7 +10715,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|||
case NVPTX::BI__hmma_m16n16k16_ld_a:
|
||||
case NVPTX::BI__hmma_m16n16k16_ld_b:
|
||||
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
|
||||
case NVPTX::BI__hmma_m16n16k16_ld_c_f32: {
|
||||
case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_a:
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_b:
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_a:
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_b:
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_c_f32: {
|
||||
Address Dst = EmitPointerWithAlignment(E->getArg(0));
|
||||
Value *Src = EmitScalarExpr(E->getArg(1));
|
||||
Value *Ldm = EmitScalarExpr(E->getArg(2));
|
||||
|
@ -10746,6 +10754,46 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|||
: Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
|
||||
NumResults = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_a:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m32n8k16_load_a_f16_row_stride;
|
||||
NumResults = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_b:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m32n8k16_load_b_f16_row_stride;
|
||||
NumResults = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m32n8k16_load_c_f16_row_stride;
|
||||
NumResults = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_col_stride
|
||||
: Intrinsic::nvvm_wmma_m32n8k16_load_c_f32_row_stride;
|
||||
NumResults = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_a:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m8n32k16_load_a_f16_row_stride;
|
||||
NumResults = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_b:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m8n32k16_load_b_f16_row_stride;
|
||||
NumResults = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m8n32k16_load_c_f16_row_stride;
|
||||
NumResults = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_col_stride
|
||||
: Intrinsic::nvvm_wmma_m8n32k16_load_c_f32_row_stride;
|
||||
NumResults = 8;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected builtin ID.");
|
||||
}
|
||||
|
@ -10764,7 +10812,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|||
}
|
||||
|
||||
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
|
||||
case NVPTX::BI__hmma_m16n16k16_st_c_f32: {
|
||||
case NVPTX::BI__hmma_m16n16k16_st_c_f32:
|
||||
case NVPTX::BI__hmma_m32n8k16_st_c_f16:
|
||||
case NVPTX::BI__hmma_m32n8k16_st_c_f32:
|
||||
case NVPTX::BI__hmma_m8n32k16_st_c_f16:
|
||||
case NVPTX::BI__hmma_m8n32k16_st_c_f32: {
|
||||
Value *Dst = EmitScalarExpr(E->getArg(0));
|
||||
Address Src = EmitPointerWithAlignment(E->getArg(1));
|
||||
Value *Ldm = EmitScalarExpr(E->getArg(2));
|
||||
|
@ -10786,6 +10838,24 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|||
IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
|
||||
: Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_st_c_f16:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m32n8k16_store_d_f16_row_stride;
|
||||
NumResults = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_st_c_f32:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_col_stride
|
||||
: Intrinsic::nvvm_wmma_m32n8k16_store_d_f32_row_stride;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_st_c_f16:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_col_stride
|
||||
: Intrinsic::nvvm_wmma_m8n32k16_store_d_f16_row_stride;
|
||||
NumResults = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_st_c_f32:
|
||||
IID = isColMajor ? Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_col_stride
|
||||
: Intrinsic::nvvm_wmma_m8n32k16_store_d_f32_row_stride;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected builtin ID.");
|
||||
}
|
||||
|
@ -10808,7 +10878,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|||
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f16f32: {
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f16f32: {
|
||||
Address Dst = EmitPointerWithAlignment(E->getArg(0));
|
||||
Address SrcA = EmitPointerWithAlignment(E->getArg(1));
|
||||
Address SrcB = EmitPointerWithAlignment(E->getArg(2));
|
||||
|
@ -10825,15 +10903,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|||
bool Satf = SatfArg.getSExtValue();
|
||||
|
||||
// clang-format off
|
||||
#define MMA_VARIANTS(type) {{ \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type, \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type##_satfinite, \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type, \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type##_satfinite, \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type, \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type##_satfinite, \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type, \
|
||||
Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type##_satfinite \
|
||||
#define MMA_VARIANTS(geom, type) {{ \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \
|
||||
Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \
|
||||
}}
|
||||
// clang-format on
|
||||
|
||||
|
@ -10847,22 +10925,62 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
|
|||
unsigned NumEltsD;
|
||||
switch (BuiltinID) {
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16));
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f16));
|
||||
NumEltsC = 4;
|
||||
NumEltsD = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16));
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f16));
|
||||
NumEltsC = 4;
|
||||
NumEltsD = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32));
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f16_f32));
|
||||
NumEltsC = 8;
|
||||
NumEltsD = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32));
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m16n16k16, f32_f32));
|
||||
NumEltsC = 8;
|
||||
NumEltsD = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f16));
|
||||
NumEltsC = 4;
|
||||
NumEltsD = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f16));
|
||||
NumEltsC = 4;
|
||||
NumEltsD = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f16_f32));
|
||||
NumEltsC = 8;
|
||||
NumEltsD = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m32n8k16, f32_f32));
|
||||
NumEltsC = 8;
|
||||
NumEltsD = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f16));
|
||||
NumEltsC = 4;
|
||||
NumEltsD = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f16));
|
||||
NumEltsC = 4;
|
||||
NumEltsD = 8;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f16_f32));
|
||||
NumEltsC = 8;
|
||||
NumEltsD = 4;
|
||||
break;
|
||||
case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
|
||||
IID = getMMAIntrinsic(MMA_VARIANTS(m8n32k16, f32_f32));
|
||||
NumEltsC = 8;
|
||||
NumEltsD = 8;
|
||||
break;
|
||||
|
|
|
@ -622,17 +622,19 @@ void CudaToolChain::addClangTargetOptions(
|
|||
CC1Args.push_back("-mlink-cuda-bitcode");
|
||||
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
|
||||
|
||||
if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
|
||||
// CUDA-9 uses new instructions that are only available in PTX6.0
|
||||
CC1Args.push_back("-target-feature");
|
||||
CC1Args.push_back("+ptx60");
|
||||
} else {
|
||||
// Libdevice in CUDA-7.0 requires PTX version that's more recent
|
||||
// than LLVM defaults to. Use PTX4.2 which is the PTX version that
|
||||
// came with CUDA-7.0.
|
||||
CC1Args.push_back("-target-feature");
|
||||
CC1Args.push_back("+ptx42");
|
||||
// Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
|
||||
// defaults to. Use PTX4.2 by default, which is the PTX version that came with
|
||||
// CUDA-7.0.
|
||||
const char *PtxFeature = "+ptx42";
|
||||
if (CudaInstallation.version() >= CudaVersion::CUDA_91) {
|
||||
// CUDA-9.1 uses new instructions that are only available in PTX6.1+
|
||||
PtxFeature = "+ptx61";
|
||||
} else if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
|
||||
// CUDA-9.0 uses new instructions that are only available in PTX6.0+
|
||||
PtxFeature = "+ptx60";
|
||||
}
|
||||
CC1Args.push_back("-target-feature");
|
||||
CC1Args.push_back(PtxFeature);
|
||||
|
||||
if (DeviceOffloadingKind == Action::OFK_OpenMP) {
|
||||
SmallVector<StringRef, 8> LibraryPaths;
|
||||
|
|
|
@ -1,9 +1,16 @@
|
|||
// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \
|
||||
// RUN: -fcuda-is-device -target-feature +ptx60 \
|
||||
// RUN: -S -emit-llvm -o - -x cuda %s \
|
||||
// RUN: | FileCheck -check-prefix=CHECK %s
|
||||
// RUN: | FileCheck -check-prefix=CHECK_M16 %s
|
||||
// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \
|
||||
// RUN: -fcuda-is-device -target-feature +ptx61 -DPTX61 \
|
||||
// RUN: -S -emit-llvm -o - -x cuda %s \
|
||||
// RUN: | FileCheck -check-prefixes=CHECK_M16,CHECK_M32_M8 %s
|
||||
// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
|
||||
// RUN: -fcuda-is-device -S -o /dev/null -x cuda -verify %s
|
||||
// RUN: -DPTX61 -fcuda-is-device -S -o /dev/null -x cuda -verify=pre-sm_70 %s
|
||||
// RUN: %clang_cc1 -triple nvptx-unknown-unknown \
|
||||
// RUN: -target-cpu sm_70 -target-feature +ptx60 \
|
||||
// RUN: -DPTX61 -fcuda-is-device -S -o /dev/null -x cuda -verify=pre-ptx61 %s
|
||||
|
||||
#if !defined(CUDA_VERSION)
|
||||
#define __device__ __attribute__((device))
|
||||
|
@ -18,149 +25,443 @@ typedef unsigned long long uint64_t;
|
|||
// that encounters an error, so -verify will not be able to find errors in
|
||||
// subsequent functions.
|
||||
|
||||
// CHECK-LABEL: nvvm_wmma
|
||||
__device__ void nvvm_wmma(int *src, int *dst,
|
||||
float *fsrc, float *fdst,
|
||||
int ldm) {
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
|
||||
// CHECK-LABEL: nvvm_wmma_m16n16k16
|
||||
__device__ void nvvm_wmma_m16n16k16(int *src, int *dst,
|
||||
float *fsrc, float *fdst,
|
||||
int ldm) {
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_a(dst, src, ldm, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_a(dst, src+1, ldm, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_b(dst, src, ldm, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_b(dst, src+2, ldm, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
|
||||
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
|
||||
// CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
|
||||
// expected-error@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
|
||||
// CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
|
||||
// pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature sm_70{{.*}},ptx60{{.*}}}}
|
||||
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
|
||||
}
|
||||
|
||||
#ifdef PTX61
|
||||
// CHECK-LABEL: nvvm_wmma_m32n8k16
|
||||
__device__ void nvvm_wmma_m32n8k16(int *src, int *dst,
|
||||
float *fsrc, float *fdst,
|
||||
int ldm) {
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_a(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_a(dst, src+1, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_b(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_b(dst, src+2, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_st_c_f16(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_st_c_f16(dst, src, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
|
||||
|
||||
|
||||
// m8n32k16 variants.
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_a(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_a(dst, src+1, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_b(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_b(dst, src+2, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_st_c_f16(dst, src, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_st_c_f16(dst, src, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 1);
|
||||
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
|
||||
// CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32.satfinite
|
||||
// pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature sm_70{{.*}},ptx61{{.*}}}}
|
||||
__hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue