AMDGPU: Add occupancy to serialized MachineFunctionInfo

Not sure about the default value handling, but also not sure
defaulting to a theoretically subtarget dependent value.
This commit is contained in:
Matt Arsenault 2021-01-15 16:07:37 -05:00
parent 37510f69b4
commit 20566a2ed8
5 changed files with 50 additions and 0 deletions

View File

@ -1225,6 +1225,12 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
MFI->initializeBaseYamlFields(YamlMFI);
if (MFI->Occupancy == 0) {
// Fixup the subtarget dependent default value.
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
}
auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
Register TempReg;
if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {

View File

@ -538,6 +538,7 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
Occupancy(MFI.getOccupancy()),
ScratchRSrcReg(regToString(MFI.getScratchRSrcReg(), TRI)),
FrameOffsetReg(regToString(MFI.getFrameOffsetReg(), TRI)),
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
@ -555,6 +556,7 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
LDSSize = YamlMFI.LDSSize;
DynLDSAlign = YamlMFI.DynLDSAlign;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
Occupancy = YamlMFI.Occupancy;
IsEntryFunction = YamlMFI.IsEntryFunction;
NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
MemoryBound = YamlMFI.MemoryBound;

View File

@ -275,6 +275,9 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
bool HasSpilledVGPRs = false;
uint32_t HighBitsOf32BitAddress = 0;
// TODO: 10 may be a better default since it's the maximum.
unsigned Occupancy = 0;
StringValue ScratchRSrcReg = "$private_rsrc_reg";
StringValue FrameOffsetReg = "$fp_reg";
StringValue StackPtrOffsetReg = "$sp_reg";
@ -313,6 +316,7 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
YamlIO.mapOptional("highBitsOf32BitAddress",
MFI.HighBitsOf32BitAddress, 0u);
YamlIO.mapOptional("occupancy", MFI.Occupancy, 0);
}
};

View File

@ -32,6 +32,7 @@
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@ -50,6 +51,7 @@
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: kernel0
machineFunctionInfo:
@ -102,12 +104,14 @@ body: |
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: no_mfi
@ -143,12 +147,14 @@ body: |
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: empty_mfi
@ -185,6 +191,7 @@ body: |
# FULL-NEXT: fp64-fp16-input-denormals: true
# FULL-NEXT: fp64-fp16-output-denormals: true
# FULL-NEXT: highBitsOf32BitAddress: 0
# FULL-NEXT: occupancy: 10
# FULL-NEXT: body:
# SIMPLE: machineFunctionInfo:
@ -192,6 +199,7 @@ body: |
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: empty_mfi_entry_func
@ -306,3 +314,29 @@ body: |
S_ENDPGM 0
...
---
# ALL-LABEL: name: occupancy_0
# ALL: occupancy: 10
name: occupancy_0
machineFunctionInfo:
occupancy: 0
body: |
bb.0:
S_ENDPGM 0
...
---
# ALL-LABEL: name: occupancy_3
# ALL: occupancy: 3
name: occupancy_3
machineFunctionInfo:
occupancy: 3
body: |
bb.0:
S_ENDPGM 0
...

View File

@ -35,6 +35,7 @@
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
%gep = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %arg0
@ -68,6 +69,7 @@ define amdgpu_kernel void @kernel(i32 %arg0, i64 %arg1, <16 x i32> %arg2) {
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
ret void
@ -98,6 +100,7 @@ define amdgpu_ps void @ps_shader(i32 %arg0, i32 inreg %arg1) {
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define void @function() {
ret void
@ -128,6 +131,7 @@ define void @function() {
; CHECK-NEXT: fp64-fp16-input-denormals: true
; CHECK-NEXT: fp64-fp16-output-denormals: true
; CHECK-NEXT: highBitsOf32BitAddress: 0
; CHECK-NEXT: occupancy: 10
; CHECK-NEXT: body:
define void @function_nsz() #0 {
ret void