[CUDA][HIP] Allow CUDA __global__ functions to have amdgpu kernel attributes

There are HIP applications e.g. Tensorflow 1.3 using amdgpu kernel attributes, however currently they are only allowed on OpenCL kernel functions. This patch will allow amdgpu kernel attributes to be applied to CUDA/HIP __global__ functions. Differential Revision: https://reviews.llvm.org/D47958 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@334561 91177308-0d34-0410-b5e6-96231b3b80d8
2018-06-12 23:58:59 +00:00 · 2018-06-12 23:58:59 +00:00 · 078d5aacfa
parent b28c0c543c
commit 078d5aacfa
5 changed files with 76 additions and 67 deletions
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@ -8435,7 +8435,7 @@ def err_reference_pipe_type : Error <
  "pipes packet types cannot be of reference type">;
 def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 'main'">;
 def err_opencl_kernel_attr :
-  Error<"attribute %0 can only be applied to a kernel function">;
+  Error<"attribute %0 can only be applied to an OpenCL kernel function">;
 def err_opencl_return_value_with_address_space : Error<
  "return value cannot be qualified with address space">;
 def err_opencl_constant_no_init : Error<
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@ -6468,25 +6468,27 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
    } else if (const auto *A = D->getAttr<VecTypeHintAttr>()) {
      Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
      D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
-    } else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
-      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-        << A << ExpectedKernelFunction;
-      D->setInvalidDecl();
    } else if (const auto *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
      Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
      D->setInvalidDecl();
+    } else if (!D->hasAttr<CUDAGlobalAttr>()) {
+      if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      } else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      } else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      } else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+            << A << ExpectedKernelFunction;
+        D->setInvalidDecl();
+      }
    }
  }
 }
--- a/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
+++ b/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple nvptx \
+// RUN:     -fcuda-is-device -emit-llvm -o - %s | FileCheck %s \
+// RUN:     -check-prefix=NAMD
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:     -verify -o - %s | FileCheck -check-prefix=NAMD %s
+
+#include "Inputs/cuda.h"
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
+__global__ void flat_work_group_size_32_64() {
+// CHECK: define amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+__global__ void waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @_Z14waves_per_eu_2v() [[WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
+__global__ void num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @_Z11num_sgpr_32v() [[NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
+__global__ void num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @_Z11num_vgpr_64v() [[NUM_VGPR_64:#[0-9]+]]
+}
+
+// Make sure this is silently accepted on other targets.
+// NAMD-NOT: "amdgpu-flat-work-group-size"
+// NAMD-NOT: "amdgpu-waves-per-eu"
+// NAMD-NOT: "amdgpu-num-vgpr"
+// NAMD-NOT: "amdgpu-num-sgpr"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" 
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" 
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" 
--- a/test/SemaCUDA/amdgpu-attrs.cu
+++ b/test/SemaCUDA/amdgpu-attrs.cu
@ -1,110 +1,80 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-
 #include "Inputs/cuda.h"


-// expected-error@+2 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64)))
 __global__ void flat_work_group_size_32_64() {}

-// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2)))
 __global__ void waves_per_eu_2() {}

-// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2, 4)))
 __global__ void waves_per_eu_2_4() {}

-// expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_sgpr(32)))
 __global__ void num_sgpr_32() {}

-// expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_vgpr(64)))
 __global__ void num_vgpr_64() {}


-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2() {}

-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4() {}

-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32)))
 __global__ void flat_work_group_size_32_64_num_sgpr_32() {}

-// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_num_vgpr_64() {}

-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
 __global__ void waves_per_eu_2_num_sgpr_32() {}

-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
 __global__ void waves_per_eu_2_num_vgpr_64() {}

-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
 __global__ void waves_per_eu_2_4_num_sgpr_32() {}

-// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
 __global__ void waves_per_eu_2_4_num_vgpr_64() {}

-// expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
 __global__ void num_sgpr_32_num_vgpr_64() {}

-
-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {}

-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {}

-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {}

-// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {}

-
-// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {}

-// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
-// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
-// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
 __attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
 __global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {}
+
+// expected-error@+2{{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel function}}
+__attribute__((reqd_work_group_size(32, 64, 64)))
+__global__ void reqd_work_group_size_32_64_64() {}
+
+// expected-error@+2{{attribute 'work_group_size_hint' can only be applied to an OpenCL kernel function}}
+__attribute__((work_group_size_hint(2, 2, 2)))
+__global__ void work_group_size_hint_2_2_2() {}
+
+// expected-error@+2{{attribute 'vec_type_hint' can only be applied to an OpenCL kernel function}}
+__attribute__((vec_type_hint(int)))
+__global__ void vec_type_hint_int() {}
+
+// expected-error@+2{{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel function}}
+__attribute__((intel_reqd_sub_group_size(64)))
+__global__ void intel_reqd_sub_group_size_64() {}
--- a/test/SemaOpenCL/invalid-kernel-attrs.cl
+++ b/test/SemaOpenCL/invalid-kernel-attrs.cl
@ -14,11 +14,11 @@ kernel __attribute__((work_group_size_hint(8,16,32,4))) void kernel6() {} //expe

 kernel __attribute__((work_group_size_hint(1,2,3))) __attribute__((work_group_size_hint(3,2,1))) void kernel7() {}  //expected-warning{{attribute 'work_group_size_hint' is already applied with different parameters}}

-__attribute__((reqd_work_group_size(8,16,32))) void kernel8(){} // expected-error {{attribute 'reqd_work_group_size' can only be applied to a kernel}}
+__attribute__((reqd_work_group_size(8,16,32))) void kernel8(){} // expected-error {{attribute 'reqd_work_group_size' can only be applied to an OpenCL kernel}}

-__attribute__((work_group_size_hint(8,16,32))) void kernel9(){} // expected-error {{attribute 'work_group_size_hint' can only be applied to a kernel}}
+__attribute__((work_group_size_hint(8,16,32))) void kernel9(){} // expected-error {{attribute 'work_group_size_hint' can only be applied to an OpenCL kernel}}

-__attribute__((vec_type_hint(char))) void kernel10(){} // expected-error {{attribute 'vec_type_hint' can only be applied to a kernel}}
+__attribute__((vec_type_hint(char))) void kernel10(){} // expected-error {{attribute 'vec_type_hint' can only be applied to an OpenCL kernel}}

 constant int foo1 __attribute__((reqd_work_group_size(8,16,32))) = 0; // expected-error {{'reqd_work_group_size' attribute only applies to functions}}

@ -34,6 +34,6 @@ kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expecte
 kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
 kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}

-__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}}
+__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel}}
 kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}
 kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {}  //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}