AMDGPU: Add second emergency slot for SGPR to vmem for large frames

In a future change, we will sometimes use a VGPR offset for doing
spills to memory, in which case we need 2 free VGPRs to do the SGPR
spill. In most cases we could spill the VGPR along with the SGPR being
spilled, but we don't have any free lanes for SGPR_1024 in wave32 so
we could still potentially need a second scavenging slot.
This commit is contained in:
Matt Arsenault 2021-11-23 19:47:40 -05:00
parent 85628ce75b
commit d6fdbbcace
5 changed files with 91 additions and 10 deletions

View File

@ -1229,7 +1229,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
}
}
FuncInfo->removeDeadFrameIndices(MFI);
// At this point we've already allocated all spilled SGPRs to VGPRs if we
// can. Any remaining SGPR spills will go to memory, so move them back to the
// default stack.
bool HaveSGPRToVMemSpill =
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
assert(allSGPRSpillsAreDead(MF) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
@ -1241,6 +1245,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// Add an emergency spill slot
RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
// If we are spilling SGPRs to memory with a large frame, we may need a
// second VGPR emergency frame index.
if (HaveSGPRToVMemSpill &&
allocateScavengingFrameIndexesNearIncomingSP(MF)) {
RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
}
}
}

View File

@ -321,7 +321,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// free frame index ids by the later pass(es) like "stack slot coloring"
// which in turn could mess-up with the book keeping of "frame index to VGPR
// lane".
FuncInfo->removeDeadFrameIndices(MFI);
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
MadeChange = true;
}

View File

@ -402,7 +402,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
return Spill.FullyAllocated;
}
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
bool SIMachineFunctionInfo::removeDeadFrameIndices(
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
// Remove dead frame indices from function frame, however keep FP & BP since
// spills for them haven't been inserted yet. And also make sure to remove the
// frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
@ -415,17 +416,28 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
}
}
// All other SPGRs must be allocated on the default stack, so reset the stack
// ID.
for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
++i)
if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
MFI.setStackID(i, TargetStackID::Default);
bool HaveSGPRToMemory = false;
if (ResetSGPRSpillStackIDs) {
// All other SPGRs must be allocated on the default stack, so reset the
// stack ID.
for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
++i) {
if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
MFI.setStackID(i, TargetStackID::Default);
HaveSGPRToMemory = true;
}
}
}
}
for (auto &R : VGPRToAGPRSpills) {
if (R.second.IsDead)
MFI.RemoveStackObject(R.first);
}
return HaveSGPRToMemory;
}
int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,

View File

@ -555,7 +555,11 @@ public:
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
void removeDeadFrameIndices(MachineFrameInfo &MFI);
/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
/// to the default stack.
bool removeDeadFrameIndices(MachineFrameInfo &MFI,
bool ResetSGPRSpillStackIDs);
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }

View File

@ -0,0 +1,54 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s
# Check that we allocate 2 emergency stack slots if we're spilling
# SGPRs to memory and potentially have an offset larger than fits in
# the addressing mode of the memory instructions.
# CHECK-LABEL: name: test
# CHECK: stack:
# CHECK-NEXT: - { id: 0, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4,
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 12, size: 4096, alignment: 4,
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4,
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 4, size: 4, alignment: 4,
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
# CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr0
# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
# CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr0, 0
# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
---
name: test
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
- { id: 1, size: 4096, alignment: 4 }
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
hasSpilledSGPRs: true
body: |
bb.0:
liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11
S_CMP_EQ_U32 0, 0, implicit-def $scc
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
...