mirror of https://github.com/microsoft/clang.git
[OpenCL] Add intel_reqd_sub_group_size attribute support
Summary: Add intel_reqd_sub_group_size attribute support as intel extension cl_intel_required_subgroup_size from https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt Reviewers: Anastasia, bader, hfinkel, pxli168 Reviewed By: Anastasia, bader, pxli168 Subscribers: cfe-commits, yaxunl Differential Revision: https://reviews.llvm.org/D30805 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@302125 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c7f067120d
commit
a7d03b5e20
|
@ -864,6 +864,13 @@ def OpenCLUnrollHint : InheritableAttr {
|
|||
let Documentation = [OpenCLUnrollHintDocs];
|
||||
}
|
||||
|
||||
def OpenCLIntelReqdSubGroupSize: InheritableAttr {
|
||||
let Spellings = [GNU<"intel_reqd_sub_group_size">];
|
||||
let Args = [UnsignedArgument<"SubGroupSize">];
|
||||
let Subjects = SubjectList<[Function], ErrorDiag>;
|
||||
let Documentation = [OpenCLIntelReqdSubGroupSizeDocs];
|
||||
}
|
||||
|
||||
// This attribute is both a type attribute, and a declaration attribute (for
|
||||
// parameter variables).
|
||||
def OpenCLAccess : Attr {
|
||||
|
|
|
@ -2216,6 +2216,21 @@ s6.11.5 for details.
|
|||
}];
|
||||
}
|
||||
|
||||
def OpenCLIntelReqdSubGroupSizeDocs : Documentation {
|
||||
let Category = DocCatStmt;
|
||||
let Heading = "__attribute__((intel_reqd_sub_group_size))";
|
||||
let Content = [{
|
||||
The optional attribute intel_reqd_sub_group_size can be used to indicate that
|
||||
the kernel must be compiled and executed with the specified subgroup size. When
|
||||
this attribute is present, get_max_sub_group_size() is guaranteed to return the
|
||||
specified integer value. This is important for the correctness of many subgroup
|
||||
algorithms, and in some cases may be used by the compiler to generate more optimal
|
||||
code. See `cl_intel_required_subgroup_size
|
||||
<https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_required_subgroup_size.txt>`
|
||||
for details.
|
||||
}];
|
||||
}
|
||||
|
||||
def OpenCLAccessDocs : Documentation {
|
||||
let Category = DocCatStmt;
|
||||
let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)";
|
||||
|
|
|
@ -658,34 +658,42 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
|
|||
GenOpenCLArgMetadata(FD, Fn, CGM, Context, Builder, getContext());
|
||||
|
||||
if (const VecTypeHintAttr *A = FD->getAttr<VecTypeHintAttr>()) {
|
||||
QualType hintQTy = A->getTypeHint();
|
||||
const ExtVectorType *hintEltQTy = hintQTy->getAs<ExtVectorType>();
|
||||
bool isSignedInteger =
|
||||
hintQTy->isSignedIntegerType() ||
|
||||
(hintEltQTy && hintEltQTy->getElementType()->isSignedIntegerType());
|
||||
llvm::Metadata *attrMDArgs[] = {
|
||||
QualType HintQTy = A->getTypeHint();
|
||||
const ExtVectorType *HintEltQTy = HintQTy->getAs<ExtVectorType>();
|
||||
bool IsSignedInteger =
|
||||
HintQTy->isSignedIntegerType() ||
|
||||
(HintEltQTy && HintEltQTy->getElementType()->isSignedIntegerType());
|
||||
llvm::Metadata *AttrMDArgs[] = {
|
||||
llvm::ConstantAsMetadata::get(llvm::UndefValue::get(
|
||||
CGM.getTypes().ConvertType(A->getTypeHint()))),
|
||||
llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
|
||||
llvm::IntegerType::get(Context, 32),
|
||||
llvm::APInt(32, (uint64_t)(isSignedInteger ? 1 : 0))))};
|
||||
Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, attrMDArgs));
|
||||
llvm::APInt(32, (uint64_t)(IsSignedInteger ? 1 : 0))))};
|
||||
Fn->setMetadata("vec_type_hint", llvm::MDNode::get(Context, AttrMDArgs));
|
||||
}
|
||||
|
||||
if (const WorkGroupSizeHintAttr *A = FD->getAttr<WorkGroupSizeHintAttr>()) {
|
||||
llvm::Metadata *attrMDArgs[] = {
|
||||
llvm::Metadata *AttrMDArgs[] = {
|
||||
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
|
||||
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
|
||||
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
|
||||
Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, attrMDArgs));
|
||||
Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs));
|
||||
}
|
||||
|
||||
if (const ReqdWorkGroupSizeAttr *A = FD->getAttr<ReqdWorkGroupSizeAttr>()) {
|
||||
llvm::Metadata *attrMDArgs[] = {
|
||||
llvm::Metadata *AttrMDArgs[] = {
|
||||
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())),
|
||||
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())),
|
||||
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))};
|
||||
Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, attrMDArgs));
|
||||
Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs));
|
||||
}
|
||||
|
||||
if (const OpenCLIntelReqdSubGroupSizeAttr *A =
|
||||
FD->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
|
||||
llvm::Metadata *AttrMDArgs[] = {
|
||||
llvm::ConstantAsMetadata::get(Builder.getInt32(A->getSubGroupSize()))};
|
||||
Fn->setMetadata("intel_reqd_sub_group_size",
|
||||
llvm::MDNode::get(Context, AttrMDArgs));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1413,16 +1413,8 @@ private:
|
|||
/// True if we need emit the life-time markers.
|
||||
const bool ShouldEmitLifetimeMarkers;
|
||||
|
||||
/// Add a kernel metadata node to the named metadata node 'opencl.kernels'.
|
||||
/// In the kernel metadata node, reference the kernel function and metadata
|
||||
/// nodes for its optional attribute qualifiers (OpenCL 1.1 6.7.2):
|
||||
/// - A node for the vec_type_hint(<type>) qualifier contains string
|
||||
/// "vec_type_hint", an undefined value of the <type> data type,
|
||||
/// and a Boolean that is true if the <type> is integer and signed.
|
||||
/// - A node for the work_group_size_hint(X,Y,Z) qualifier contains string
|
||||
/// "work_group_size_hint", and three 32-bit integers X, Y and Z.
|
||||
/// - A node for the reqd_work_group_size(X,Y,Z) qualifier contains string
|
||||
/// "reqd_work_group_size", and three 32-bit integers X, Y and Z.
|
||||
/// Add OpenCL kernel arg metadata and the kernel attribute meatadata to
|
||||
/// the function metadata.
|
||||
void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
|
||||
llvm::Function *Fn);
|
||||
|
||||
|
|
|
@ -2891,6 +2891,28 @@ static void handleWorkGroupSize(Sema &S, Decl *D,
|
|||
Attr.getAttributeSpellingListIndex()));
|
||||
}
|
||||
|
||||
// Handles intel_reqd_sub_group_size.
|
||||
static void handleSubGroupSize(Sema &S, Decl *D, const AttributeList &Attr) {
|
||||
uint32_t SGSize;
|
||||
const Expr *E = Attr.getArgAsExpr(0);
|
||||
if (!checkUInt32Argument(S, Attr, E, SGSize))
|
||||
return;
|
||||
if (SGSize == 0) {
|
||||
S.Diag(Attr.getLoc(), diag::err_attribute_argument_is_zero)
|
||||
<< Attr.getName() << E->getSourceRange();
|
||||
return;
|
||||
}
|
||||
|
||||
OpenCLIntelReqdSubGroupSizeAttr *Existing =
|
||||
D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>();
|
||||
if (Existing && Existing->getSubGroupSize() != SGSize)
|
||||
S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute) << Attr.getName();
|
||||
|
||||
D->addAttr(::new (S.Context) OpenCLIntelReqdSubGroupSizeAttr(
|
||||
Attr.getRange(), S.Context, SGSize,
|
||||
Attr.getAttributeSpellingListIndex()));
|
||||
}
|
||||
|
||||
static void handleVecTypeHint(Sema &S, Decl *D, const AttributeList &Attr) {
|
||||
if (!Attr.hasParsedType()) {
|
||||
S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
|
||||
|
@ -6157,6 +6179,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
|
|||
case AttributeList::AT_ReqdWorkGroupSize:
|
||||
handleWorkGroupSize<ReqdWorkGroupSizeAttr>(S, D, Attr);
|
||||
break;
|
||||
case AttributeList::AT_OpenCLIntelReqdSubGroupSize:
|
||||
handleSubGroupSize(S, D, Attr);
|
||||
break;
|
||||
case AttributeList::AT_VecTypeHint:
|
||||
handleVecTypeHint(S, D, Attr);
|
||||
break;
|
||||
|
@ -6521,6 +6546,9 @@ void Sema::ProcessDeclAttributeList(Scope *S, Decl *D,
|
|||
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
|
||||
<< A << ExpectedKernelFunction;
|
||||
D->setInvalidDecl();
|
||||
} else if (Attr *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
|
||||
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
|
||||
D->setInvalidDecl();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,11 @@ kernel __attribute__((vec_type_hint(int))) __attribute__((reqd_work_group_size(
|
|||
kernel __attribute__((vec_type_hint(uint4))) __attribute__((work_group_size_hint(8,16,32))) void kernel2(int a) {}
|
||||
// CHECK: define void @kernel2(i32 {{[^%]*}}%a) {{[^{]+}} !vec_type_hint ![[MD3:[0-9]+]] !work_group_size_hint ![[MD4:[0-9]+]]
|
||||
|
||||
kernel __attribute__((intel_reqd_sub_group_size(8))) void kernel3(int a) {}
|
||||
// CHECK: define void @kernel3(i32 {{[^%]*}}%a) {{[^{]+}} !intel_reqd_sub_group_size ![[MD5:[0-9]+]]
|
||||
|
||||
// CHECK: [[MD1]] = !{i32 undef, i32 1}
|
||||
// CHECK: [[MD2]] = !{i32 1, i32 2, i32 4}
|
||||
// CHECK: [[MD3]] = !{<4 x i32> undef, i32 0}
|
||||
// CHECK: [[MD4]] = !{i32 8, i32 16, i32 32}
|
||||
// CHECK: [[MD5]] = !{i32 8}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
// The number of supported attributes should never go down!
|
||||
|
||||
// CHECK: #pragma clang attribute supports 59 attributes:
|
||||
// CHECK: #pragma clang attribute supports 60 attributes:
|
||||
// CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
|
||||
// CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
|
||||
// CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
|
||||
|
@ -42,6 +42,7 @@
|
|||
// CHECK-NEXT: ObjCRuntimeName (SubjectMatchRule_objc_interface, SubjectMatchRule_objc_protocol)
|
||||
// CHECK-NEXT: ObjCRuntimeVisible (SubjectMatchRule_objc_interface)
|
||||
// CHECK-NEXT: ObjCSubclassingRestricted (SubjectMatchRule_objc_interface)
|
||||
// CHECK-NEXT: OpenCLIntelReqdSubGroupSize (SubjectMatchRule_function)
|
||||
// CHECK-NEXT: OpenCLNoSVM (SubjectMatchRule_variable)
|
||||
// CHECK-NEXT: OptimizeNone (SubjectMatchRule_function, SubjectMatchRule_objc_method)
|
||||
// CHECK-NEXT: Overloadable (SubjectMatchRule_function)
|
||||
|
|
|
@ -33,3 +33,7 @@ void f_kernel_image2d_t( kernel image2d_t image ) { // expected-error {{'kernel'
|
|||
kernel __attribute__((reqd_work_group_size(1,2,0))) void kernel11(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
|
||||
kernel __attribute__((reqd_work_group_size(1,0,2))) void kernel12(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
|
||||
kernel __attribute__((reqd_work_group_size(0,1,2))) void kernel13(){} // expected-error {{'reqd_work_group_size' attribute must be greater than 0}}
|
||||
|
||||
__attribute__((intel_reqd_sub_group_size(8))) void kernel14(){} // expected-error {{attribute 'intel_reqd_sub_group_size' can only be applied to a kernel}}
|
||||
kernel __attribute__((intel_reqd_sub_group_size(0))) void kernel15(){} // expected-error {{'intel_reqd_sub_group_size' attribute must be greater than 0}}
|
||||
kernel __attribute__((intel_reqd_sub_group_size(8))) __attribute__((intel_reqd_sub_group_size(16))) void kernel16() {} //expected-warning{{attribute 'intel_reqd_sub_group_size' is already applied with different parameters}}
|
||||
|
|
Loading…
Reference in New Issue