AMDGPU: Use attributor to propagate amdgpu-flat-work-group-size
This can merge the acceptable ranges based on the call graph, rather than the simple application of the attribute. Remove the handling from the old pass.
This commit is contained in:
parent
8d4b74ac3f
commit
ec57b37551
|
@ -128,6 +128,17 @@ public:
|
|||
return ST.hasApertureRegs();
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
|
||||
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
|
||||
return ST.getFlatWorkGroupSizes(F);
|
||||
}
|
||||
|
||||
std::pair<unsigned, unsigned>
|
||||
getMaximumFlatWorkGroupRange(const Function &F) {
|
||||
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
|
||||
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
|
||||
}
|
||||
|
||||
private:
|
||||
/// Check if the ConstantExpr \p CE requires queue ptr attribute.
|
||||
static bool visitConstExpr(const ConstantExpr *CE) {
|
||||
|
@ -470,6 +481,118 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
|
|||
llvm_unreachable("AAAMDAttributes is only valid for function position");
|
||||
}
|
||||
|
||||
/// Propagate amdgpu-flat-work-group-size attribute.
|
||||
struct AAAMDFlatWorkGroupSize
|
||||
: public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
|
||||
using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
|
||||
AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
|
||||
: Base(IRP, 32) {}
|
||||
|
||||
/// See AbstractAttribute::getState(...).
|
||||
IntegerRangeState &getState() override { return *this; }
|
||||
const IntegerRangeState &getState() const override { return *this; }
|
||||
|
||||
void initialize(Attributor &A) override {
|
||||
Function *F = getAssociatedFunction();
|
||||
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
|
||||
unsigned MinGroupSize, MaxGroupSize;
|
||||
std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
|
||||
intersectKnown(
|
||||
ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
|
||||
}
|
||||
|
||||
ChangeStatus updateImpl(Attributor &A) override {
|
||||
ChangeStatus Change = ChangeStatus::UNCHANGED;
|
||||
|
||||
auto CheckCallSite = [&](AbstractCallSite CS) {
|
||||
Function *Caller = CS.getInstruction()->getFunction();
|
||||
LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
|
||||
<< "->" << getAssociatedFunction()->getName() << '\n');
|
||||
|
||||
const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
|
||||
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
|
||||
|
||||
Change |=
|
||||
clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool AllCallSitesKnown = true;
|
||||
if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
|
||||
return indicatePessimisticFixpoint();
|
||||
|
||||
return Change;
|
||||
}
|
||||
|
||||
ChangeStatus manifest(Attributor &A) override {
|
||||
SmallVector<Attribute, 8> AttrList;
|
||||
Function *F = getAssociatedFunction();
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
|
||||
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
|
||||
unsigned Min, Max;
|
||||
std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
|
||||
|
||||
// Don't add the attribute if it's the implied default.
|
||||
if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
|
||||
return ChangeStatus::UNCHANGED;
|
||||
|
||||
SmallString<10> Buffer;
|
||||
raw_svector_ostream OS(Buffer);
|
||||
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
|
||||
|
||||
AttrList.push_back(
|
||||
Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
|
||||
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
|
||||
/* ForceReplace */ true);
|
||||
}
|
||||
|
||||
const std::string getAsStr() const override {
|
||||
std::string Str;
|
||||
raw_string_ostream OS(Str);
|
||||
OS << "AMDFlatWorkGroupSize[";
|
||||
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
|
||||
OS << ']';
|
||||
return OS.str();
|
||||
}
|
||||
|
||||
/// See AbstractAttribute::trackStatistics()
|
||||
void trackStatistics() const override {}
|
||||
|
||||
/// Create an abstract attribute view for the position \p IRP.
|
||||
static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
|
||||
Attributor &A);
|
||||
|
||||
/// See AbstractAttribute::getName()
|
||||
const std::string getName() const override {
|
||||
return "AAAMDFlatWorkGroupSize";
|
||||
}
|
||||
|
||||
/// See AbstractAttribute::getIdAddr()
|
||||
const char *getIdAddr() const override { return &ID; }
|
||||
|
||||
/// This function should return true if the type of the \p AA is
|
||||
/// AAAMDFlatWorkGroupSize
|
||||
static bool classof(const AbstractAttribute *AA) {
|
||||
return (AA->getIdAddr() == &ID);
|
||||
}
|
||||
|
||||
/// Unique ID (due to the unique address)
|
||||
static const char ID;
|
||||
};
|
||||
|
||||
const char AAAMDFlatWorkGroupSize::ID = 0;
|
||||
|
||||
AAAMDFlatWorkGroupSize &
|
||||
AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
|
||||
Attributor &A) {
|
||||
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
|
||||
return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
|
||||
llvm_unreachable(
|
||||
"AAAMDFlatWorkGroupSize is only valid for function position");
|
||||
}
|
||||
|
||||
class AMDGPUAttributor : public ModulePass {
|
||||
public:
|
||||
AMDGPUAttributor() : ModulePass(ID) {}
|
||||
|
@ -497,7 +620,8 @@ public:
|
|||
BumpPtrAllocator Allocator;
|
||||
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
|
||||
DenseSet<const char *> Allowed(
|
||||
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID, &AACallEdges::ID});
|
||||
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
|
||||
&AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
|
||||
|
||||
Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
|
||||
|
||||
|
@ -505,6 +629,9 @@ public:
|
|||
if (!F.isIntrinsic()) {
|
||||
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
|
||||
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
|
||||
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
|
||||
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,10 +55,7 @@ static constexpr const FeatureBitset TargetFeatures = {
|
|||
|
||||
// Attributes to propagate.
|
||||
// TODO: Support conservative min/max merging instead of cloning.
|
||||
static constexpr const char* AttributeNames[] = {
|
||||
"amdgpu-waves-per-eu",
|
||||
"amdgpu-flat-work-group-size"
|
||||
};
|
||||
static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
|
||||
|
||||
static constexpr unsigned NumAttr =
|
||||
sizeof(AttributeNames) / sizeof(AttributeNames[0]);
|
||||
|
|
|
@ -1,49 +0,0 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-propagate-attributes-late %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-propagate-attributes-late %s | FileCheck %s
|
||||
|
||||
; CHECK: define internal void @max_flat_1_1024() #0 {
|
||||
define internal void @max_flat_1_1024() #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define internal void @max_flat_1_256() #1 {
|
||||
define internal void @max_flat_1_256() #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel_1_256_call_default() #1 {
|
||||
define amdgpu_kernel void @kernel_1_256_call_default() #1 {
|
||||
call void @default()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel_1_256_call_1_256() #1 {
|
||||
define amdgpu_kernel void @kernel_1_256_call_1_256() #1 {
|
||||
call void @max_flat_1_256()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define amdgpu_kernel void @kernel_1_256_call_64_64() #1 {
|
||||
define amdgpu_kernel void @kernel_1_256_call_64_64() #1 {
|
||||
call void @max_flat_64_64()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define internal void @max_flat_64_64() #2 {
|
||||
define internal void @max_flat_64_64() #2 {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: define internal void @default() #2 {
|
||||
define internal void @default() #3 {
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024" }
|
||||
attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256" }
|
||||
attributes #2 = { noinline "amdgpu-flat-work-group-size"="64,64" }
|
||||
attributes #3 = { noinline }
|
||||
|
||||
; CHECK: attributes #0 = { noinline "amdgpu-flat-work-group-size"="1,1024"
|
||||
; CHECK-NEXT: attributes #1 = { noinline "amdgpu-flat-work-group-size"="1,256"
|
||||
; CHECK-NEXT: attributes #2 = { noinline "amdgpu-flat-work-group-size"="1,256"
|
|
@ -0,0 +1,214 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck %s
|
||||
|
||||
; Check propagation of amdgpu-flat-work-group-size attribute.
|
||||
|
||||
; Called from a single kernel with 1,256
|
||||
define internal void @default_to_1_256() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@default_to_1_256
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel_1_256() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel_1_256
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @default_to_1_256()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @default_to_1_256()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Called from a single kernel with 64,128
|
||||
define internal void @default_to_64_128() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@default_to_64_128
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel_64_128() #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel_64_128
|
||||
; CHECK-SAME: () #[[ATTR1]] {
|
||||
; CHECK-NEXT: call void @default_to_64_128()
|
||||
; CHECK-NEXT: call void @flat_group_64_64()
|
||||
; CHECK-NEXT: call void @default_to_64_256()
|
||||
; CHECK-NEXT: call void @flat_group_128_256()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @default_to_64_128()
|
||||
call void @flat_group_64_64()
|
||||
call void @default_to_64_256()
|
||||
call void @flat_group_128_256()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Called from kernels with 128,512 and 512,512
|
||||
define internal void @default_to_128_512() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@default_to_128_512
|
||||
; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
; This already has a strict bounds, but called from kernels with wider
|
||||
; bounds, and should not be changed.
|
||||
define internal void @flat_group_64_64() #2 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@flat_group_64_64
|
||||
; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
; 128,256 -> 128,128
|
||||
define internal void @flat_group_128_256() #3 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@flat_group_128_256
|
||||
; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @flat_group_512_1024() #4 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@flat_group_512_1024
|
||||
; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel_128_512() #5 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel_128_512
|
||||
; CHECK-SAME: () #[[ATTR2]] {
|
||||
; CHECK-NEXT: call void @default_to_128_512()
|
||||
; CHECK-NEXT: call void @flat_group_64_64()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @default_to_128_512()
|
||||
call void @flat_group_64_64()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel_512_512() #6 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel_512_512
|
||||
; CHECK-SAME: () #[[ATTR5]] {
|
||||
; CHECK-NEXT: call void @default_to_128_512()
|
||||
; CHECK-NEXT: call void @flat_group_512_1024()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @default_to_128_512()
|
||||
call void @flat_group_512_1024()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Called from kernels with 128,256 and 64,128 => 64,256
|
||||
define internal void @default_to_64_256() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@default_to_64_256
|
||||
; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
; The kernel's lower bound is higher than the callee's lower bound, so
|
||||
; this should probably be illegal.
|
||||
define amdgpu_kernel void @kernel_128_256() #3 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel_128_256
|
||||
; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @default_to_64_256()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @default_to_64_256()
|
||||
ret void
|
||||
}
|
||||
|
||||
; 64,128 -> 64,128
|
||||
define internal void @merge_cycle_0() #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@merge_cycle_0
|
||||
; CHECK-SAME: () #[[ATTR1]] {
|
||||
; CHECK-NEXT: call void @merge_cycle_1()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @merge_cycle_1()
|
||||
ret void
|
||||
}
|
||||
|
||||
; 128,256 -> 128,128
|
||||
define internal void @merge_cycle_1() #3 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@merge_cycle_1
|
||||
; CHECK-SAME: () #[[ATTR4]] {
|
||||
; CHECK-NEXT: call void @merge_cycle_0()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @merge_cycle_0()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel_64_256() #7 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel_64_256
|
||||
; CHECK-SAME: () #[[ATTR6]] {
|
||||
; CHECK-NEXT: call void @merge_cycle_0()
|
||||
; CHECK-NEXT: call void @default_captured_address()
|
||||
; CHECK-NEXT: call void @externally_visible_default()
|
||||
; CHECK-NEXT: [[F32:%.*]] = call float bitcast (i32 ()* @bitcasted_function to float ()*)()
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @merge_cycle_0()
|
||||
call void @default_captured_address()
|
||||
call void @externally_visible_default()
|
||||
%f32 = call float bitcast (i32 ()* @bitcasted_function to float ()*)()
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @default_captured_address() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@default_captured_address
|
||||
; CHECK-SAME: () #[[ATTR8:[0-9]+]] {
|
||||
; CHECK-NEXT: store volatile void ()* @default_captured_address, void ()** undef, align 8
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store volatile void ()* @default_captured_address, void ()** undef, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @externally_visible_default() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@externally_visible_default
|
||||
; CHECK-SAME: () #[[ATTR8]] {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
; 1,1024 -> 64,256
|
||||
define internal i32 @bitcasted_function() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@bitcasted_function
|
||||
; CHECK-SAME: () #[[ATTR6]] {
|
||||
; CHECK-NEXT: ret i32 0
|
||||
;
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
|
||||
attributes #1 = { "amdgpu-flat-work-group-size"="64,128" }
|
||||
attributes #2 = { "amdgpu-flat-work-group-size"="64,64" }
|
||||
attributes #3 = { "amdgpu-flat-work-group-size"="128,256" }
|
||||
attributes #4 = { "amdgpu-flat-work-group-size"="512,1024" }
|
||||
attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
|
||||
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
|
||||
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,128" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
Loading…
Reference in New Issue