[OpenCL] Add intel_reqd_sub_group_size attribute support

Summary:
Add intel_reqd_sub_group_size attribute support as intel extension  cl_intel_required_subgroup_size from
https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt

Reviewers: Anastasia, bader, hfinkel, pxli168

Reviewed By: Anastasia, bader, pxli168

Subscribers: cfe-commits, yaxunl

Differential Revision: https://reviews.llvm.org/D30805

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@302125 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Xiuli Pan 2017-05-04 07:31:20 +00:00
parent c7f067120d
commit a7d03b5e20
8 changed files with 82 additions and 23 deletions

View File

@ -864,6 +864,13 @@ def OpenCLUnrollHint : InheritableAttr {
let Documentation = [OpenCLUnrollHintDocs];
}
def OpenCLIntelReqdSubGroupSize: InheritableAttr {
let Spellings = [GNU<"intel_reqd_sub_group_size">];
let Args = [UnsignedArgument<"SubGroupSize">];
let Subjects = SubjectList<[Function], ErrorDiag>;
let Documentation = [OpenCLIntelReqdSubGroupSizeDocs];
}
// This attribute is both a type attribute, and a declaration attribute (for
// parameter variables).
def OpenCLAccess : Attr {

View File

@ -2216,6 +2216,21 @@ s6.11.5 for details.
}];
}
def OpenCLIntelReqdSubGroupSizeDocs : Documentation {
let Category = DocCatStmt;
let Heading = "__attribute__((intel_reqd_sub_group_size))";
let Content = [{
The optional attribute intel_reqd_sub_group_size can be used to indicate that
the kernel must be compiled and executed with the specified subgroup size. When
this attribute is present, get_max_sub_group_size() is guaranteed to return the
specified integer value. This is important for the correctness of many subgroup
algorithms, and in some cases may be used by the compiler to generate more optimal
code. See `cl_intel_required_subgroup_size
<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt>`
for details.
}];
}
def OpenCLAccessDocs : Documentation {
let Category = DocCatStmt;
let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)";

View File

@ -658,34 +658,42 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext());
if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
QualType hintQTy = A->getTypeHint();
const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
bool isSignedInteger =
hintQTy->isSignedIntegerType() ||
(hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
llvm::Metadata *attrMDArgs[] = {
QualType HintQTy = A->getTypeHint();
const ExtVectorType *HintEltQTy = HintQTy->getAs<ExtVectorType>();
bool IsSignedInteger =
HintQTy->isSignedIntegerType() ||
(HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType());
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
CGM.getTypes().ConvertType(A->getTypeHint()))),
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
llvm::IntegerType::get(Context, 32),
llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs));
llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))};
Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs));
}
if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
llvm::Metadata *attrMDArgs[] = {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs));
Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs));
}
if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
llvm::Metadata *attrMDArgs[] = {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs));
Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs));
}
if (const OpenCLIntelReqdSubGroupSizeAttr *A =
FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))};
Fn->setMetadata("intel_reqd_sub_group_size",
llvm::MDNode::get(Context, AttrMDArgs));
}
}

View File

@ -1413,16 +1413,8 @@ private:
/// True if we need emit the life-time markers.
const bool ShouldEmitLifetimeMarkers;
/// Add a kernel metadata node to the named metadata node 'opencl.kernels'.
/// In the kernel metadata node, reference the kernel function and metadata
/// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2):
/// - A node for the vec_type_hint(<type>) qualifier contains string
/// "vec_type_hint", an undefined value of the <type> data type,
/// and a Boolean that is true if the <type> is integer and signed.
/// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string
/// "work_group_size_hint", and three 32-bit integers X, Y and Z.
/// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string
/// "reqd_work_group_size", and three 32-bit integers X, Y and Z.
/// Add OpenCL kernel arg metadata and the kernel attribute meatadata to
/// the function metadata.
void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::Function *Fn);

View File

@ -2891,6 +2891,28 @@ static void handleWorkGroupSize(Sema &S, Decl *D,
Attr.getAttributeSpellingListIndex()));
}
// Handles intel_reqd_sub_group_size.
static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &Attr) {
uint32_t SGSize;
const Expr *E = Attr.getArgAsExpr(0);
if (!checkUInt32Argument(S, Attr, E, SGSize))
return;
if (SGSize == 0) {
S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
<< Attr.getName() << E->getSourceRange();
return;
}
OpenCLIntelReqdSubGroupSizeAttr *Existing =
D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>();
if (Existing && Existing->getSubGroupSize() != SGSize)
S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName();
D->addAttr(::new (S.Context) OpenCLIntelReqdSubGroupSizeAttr(
Attr.getRange(), S.Context, SGSize,
Attr.getAttributeSpellingListIndex()));
}
static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &Attr) {
if (!Attr.hasParsedType()) {
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
@ -6157,6 +6179,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case AttributeList::AT_ReqdWorkGroupSize:
handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, Attr);
break;
case AttributeList::AT_OpenCLIntelReqdSubGroupSize:
handleSubGroupSize(S, D, Attr);
break;
case AttributeList::AT_VecTypeHint:
handleVecTypeHint(S, D, Attr);
break;
@ -6521,6 +6546,9 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
} else if (Attr *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
D->setInvalidDecl();
}
}
}

View File

@ -8,7 +8,11 @@ kernel __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(
kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
// CHECK: define void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
// CHECK: define void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
// CHECK: [[MD1]] = !{i32 undef, i32 1}
// CHECK: [[MD2]] = !{i32 1, i32 2, i32 4}
// CHECK: [[MD3]] = !{<4 x i32> undef, i32 0}
// CHECK: [[MD4]] = !{i32 8, i32 16, i32 32}
// CHECK: [[MD5]] = !{i32 8}

View File

@ -2,7 +2,7 @@
// The number of supported attributes should never go down!
// CHECK: #pragma clang attribute supports 59 attributes:
// CHECK: #pragma clang attribute supports 60 attributes:
// CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
// CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
@ -42,6 +42,7 @@
// CHECK-NEXT: ObjCRuntimeName (SubjectMatchRule_objc_interface, SubjectMatchRule_objc_protocol)
// CHECK-NEXT: ObjCRuntimeVisible (SubjectMatchRule_objc_interface)
// CHECK-NEXT: ObjCSubclassingRestricted (SubjectMatchRule_objc_interface)
// CHECK-NEXT: OpenCLIntelReqdSubGroupSize (SubjectMatchRule_function)
// CHECK-NEXT: OpenCLNoSVM (SubjectMatchRule_variable)
// CHECK-NEXT: OptimizeNone (SubjectMatchRule_function, SubjectMatchRule_objc_method)
// CHECK-NEXT: Overloadable (SubjectMatchRule_function)

View File

@ -33,3 +33,7 @@ void f_kernel_image2d_t( kernel image2d_t image ) { // expected-error {{'kernel'
kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}}
kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}
kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {} //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}