AMDGPU: Add occupancy to serialized MachineFunctionInfo
Not sure about the default value handling, but also not sure defaulting to a theoretically subtarget dependent value.
This commit is contained in:
parent
37510f69b4
commit
20566a2ed8
|
@ -1225,6 +1225,12 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
|
|||
|
||||
MFI->initializeBaseYamlFields(YamlMFI);
|
||||
|
||||
if (MFI->Occupancy == 0) {
|
||||
// Fixup the subtarget dependent default value.
|
||||
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||||
MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
|
||||
}
|
||||
|
||||
auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
|
||||
Register TempReg;
|
||||
if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
|
||||
|
|
|
@ -538,6 +538,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
|
|||
HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
|
||||
HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
|
||||
HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
|
||||
Occupancy(MFI.getOccupancy()),
|
||||
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
|
||||
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
|
||||
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
|
||||
|
@ -555,6 +556,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
|
|||
LDSSize = YamlMFI.LDSSize;
|
||||
DynLDSAlign = YamlMFI.DynLDSAlign;
|
||||
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
|
||||
Occupancy = YamlMFI.Occupancy;
|
||||
IsEntryFunction = YamlMFI.IsEntryFunction;
|
||||
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
|
||||
MemoryBound = YamlMFI.MemoryBound;
|
||||
|
|
|
@ -275,6 +275,9 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
|
|||
bool HasSpilledVGPRs = false;
|
||||
uint32_t HighBitsOf32BitAddress = 0;
|
||||
|
||||
// TODO: 10 may be a better default since it's the maximum.
|
||||
unsigned Occupancy = 0;
|
||||
|
||||
StringValue ScratchRSrcReg = "$private_rsrc_reg";
|
||||
StringValue FrameOffsetReg = "$fp_reg";
|
||||
StringValue StackPtrOffsetReg = "$sp_reg";
|
||||
|
@ -313,6 +316,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
|
|||
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
|
||||
YamlIO.mapOptional("highBitsOf32BitAddress",
|
||||
MFI.HighBitsOf32BitAddress, 0u);
|
||||
YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@
|
|||
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||
# FULL-NEXT: occupancy: 10
|
||||
# FULL-NEXT: body:
|
||||
|
||||
# SIMPLE: machineFunctionInfo:
|
||||
|
@ -50,6 +51,7 @@
|
|||
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
|
||||
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
|
||||
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
|
||||
# SIMPLE-NEXT: occupancy: 10
|
||||
# SIMPLE-NEXT: body:
|
||||
name: kernel0
|
||||
machineFunctionInfo:
|
||||
|
@ -102,12 +104,14 @@ body: |
|
|||
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||
# FULL-NEXT: occupancy: 10
|
||||
# FULL-NEXT: body:
|
||||
|
||||
# SIMPLE: machineFunctionInfo:
|
||||
# SIMPLE-NEXT: maxKernArgAlign: 1
|
||||
# SIMPLE-NEXT: argumentInfo:
|
||||
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
# SIMPLE-NEXT: occupancy: 10
|
||||
# SIMPLE-NEXT: body:
|
||||
|
||||
name: no_mfi
|
||||
|
@ -143,12 +147,14 @@ body: |
|
|||
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||
# FULL-NEXT: occupancy: 10
|
||||
# FULL-NEXT: body:
|
||||
|
||||
# SIMPLE: machineFunctionInfo:
|
||||
# SIMPLE-NEXT: maxKernArgAlign: 1
|
||||
# SIMPLE-NEXT: argumentInfo:
|
||||
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
# SIMPLE-NEXT: occupancy: 10
|
||||
# SIMPLE-NEXT: body:
|
||||
|
||||
name: empty_mfi
|
||||
|
@ -185,6 +191,7 @@ body: |
|
|||
# FULL-NEXT: fp64-fp16-input-denormals: true
|
||||
# FULL-NEXT: fp64-fp16-output-denormals: true
|
||||
# FULL-NEXT: highBitsOf32BitAddress: 0
|
||||
# FULL-NEXT: occupancy: 10
|
||||
# FULL-NEXT: body:
|
||||
|
||||
# SIMPLE: machineFunctionInfo:
|
||||
|
@ -192,6 +199,7 @@ body: |
|
|||
# SIMPLE-NEXT: isEntryFunction: true
|
||||
# SIMPLE-NEXT: argumentInfo:
|
||||
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
||||
# SIMPLE-NEXT: occupancy: 10
|
||||
# SIMPLE-NEXT: body:
|
||||
|
||||
name: empty_mfi_entry_func
|
||||
|
@ -306,3 +314,29 @@ body: |
|
|||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
# ALL-LABEL: name: occupancy_0
|
||||
# ALL: occupancy: 10
|
||||
name: occupancy_0
|
||||
machineFunctionInfo:
|
||||
occupancy: 0
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
# ALL-LABEL: name: occupancy_3
|
||||
# ALL: occupancy: 3
|
||||
name: occupancy_3
|
||||
machineFunctionInfo:
|
||||
occupancy: 3
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||
; CHECK-NEXT: occupancy: 10
|
||||
; CHECK-NEXT: body:
|
||||
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
|
||||
%gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0
|
||||
|
@ -68,6 +69,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
|
|||
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||
; CHECK-NEXT: occupancy: 10
|
||||
; CHECK-NEXT: body:
|
||||
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
|
||||
ret void
|
||||
|
@ -98,6 +100,7 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
|
|||
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||
; CHECK-NEXT: occupancy: 10
|
||||
; CHECK-NEXT: body:
|
||||
define void @function() {
|
||||
ret void
|
||||
|
@ -128,6 +131,7 @@ define void @function() {
|
|||
; CHECK-NEXT: fp64-fp16-input-denormals: true
|
||||
; CHECK-NEXT: fp64-fp16-output-denormals: true
|
||||
; CHECK-NEXT: highBitsOf32BitAddress: 0
|
||||
; CHECK-NEXT: occupancy: 10
|
||||
; CHECK-NEXT: body:
|
||||
define void @function_nsz() #0 {
|
||||
ret void
|
||||
|
|
Loading…
Reference in New Issue