AMDGPU: Add second emergency slot for SGPR to vmem for large frames
In a future change, we will sometimes use a VGPR offset for doing spills to memory, in which case we need 2 free VGPRs to do the SGPR spill. In most cases we could spill the VGPR along with the SGPR being spilled, but we don't have any free lanes for SGPR_1024 in wave32 so we could still potentially need a second scavenging slot.
This commit is contained in:
parent
85628ce75b
commit
d6fdbbcace
|
@ -1229,7 +1229,11 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
|||
}
|
||||
}
|
||||
|
||||
FuncInfo->removeDeadFrameIndices(MFI);
|
||||
// At this point we've already allocated all spilled SGPRs to VGPRs if we
|
||||
// can. Any remaining SGPR spills will go to memory, so move them back to the
|
||||
// default stack.
|
||||
bool HaveSGPRToVMemSpill =
|
||||
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
|
||||
assert(allSGPRSpillsAreDead(MF) &&
|
||||
"SGPR spill should have been removed in SILowerSGPRSpills");
|
||||
|
||||
|
@ -1241,6 +1245,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
|
|||
|
||||
// Add an emergency spill slot
|
||||
RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
|
||||
|
||||
// If we are spilling SGPRs to memory with a large frame, we may need a
|
||||
// second VGPR emergency frame index.
|
||||
if (HaveSGPRToVMemSpill &&
|
||||
allocateScavengingFrameIndexesNearIncomingSP(MF)) {
|
||||
RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -321,7 +321,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
|
|||
// free frame index ids by the later pass(es) like "stack slot coloring"
|
||||
// which in turn could mess-up with the book keeping of "frame index to VGPR
|
||||
// lane".
|
||||
FuncInfo->removeDeadFrameIndices(MFI);
|
||||
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
|
||||
|
||||
MadeChange = true;
|
||||
}
|
||||
|
|
|
@ -402,7 +402,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
|
|||
return Spill.FullyAllocated;
|
||||
}
|
||||
|
||||
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
|
||||
bool SIMachineFunctionInfo::removeDeadFrameIndices(
|
||||
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
|
||||
// Remove dead frame indices from function frame, however keep FP & BP since
|
||||
// spills for them haven't been inserted yet. And also make sure to remove the
|
||||
// frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
|
||||
|
@ -415,17 +416,28 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
|
|||
}
|
||||
}
|
||||
|
||||
// All other SPGRs must be allocated on the default stack, so reset the stack
|
||||
// ID.
|
||||
for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
|
||||
++i)
|
||||
if (i != FramePointerSaveIndex && i != BasePointerSaveIndex)
|
||||
MFI.setStackID(i, TargetStackID::Default);
|
||||
bool HaveSGPRToMemory = false;
|
||||
|
||||
if (ResetSGPRSpillStackIDs) {
|
||||
// All other SPGRs must be allocated on the default stack, so reset the
|
||||
// stack ID.
|
||||
for (int i = MFI.getObjectIndexBegin(), e = MFI.getObjectIndexEnd(); i != e;
|
||||
++i) {
|
||||
if (i != FramePointerSaveIndex && i != BasePointerSaveIndex) {
|
||||
if (MFI.getStackID(i) == TargetStackID::SGPRSpill) {
|
||||
MFI.setStackID(i, TargetStackID::Default);
|
||||
HaveSGPRToMemory = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &R : VGPRToAGPRSpills) {
|
||||
if (R.second.IsDead)
|
||||
MFI.RemoveStackObject(R.first);
|
||||
}
|
||||
|
||||
return HaveSGPRToMemory;
|
||||
}
|
||||
|
||||
int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
|
||||
|
|
|
@ -555,7 +555,11 @@ public:
|
|||
unsigned NumLane) const;
|
||||
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
|
||||
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
|
||||
void removeDeadFrameIndices(MachineFrameInfo &MFI);
|
||||
|
||||
/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
|
||||
/// to the default stack.
|
||||
bool removeDeadFrameIndices(MachineFrameInfo &MFI,
|
||||
bool ResetSGPRSpillStackIDs);
|
||||
|
||||
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
|
||||
Optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s
|
||||
|
||||
# Check that we allocate 2 emergency stack slots if we're spilling
|
||||
# SGPRs to memory and potentially have an offset larger than fits in
|
||||
# the addressing mode of the memory instructions.
|
||||
|
||||
# CHECK-LABEL: name: test
|
||||
# CHECK: stack:
|
||||
# CHECK-NEXT: - { id: 0, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4,
|
||||
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 12, size: 4096, alignment: 4,
|
||||
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4,
|
||||
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 4, size: 4, alignment: 4,
|
||||
# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
|
||||
# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
|
||||
|
||||
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
# CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr0
|
||||
# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
|
||||
# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
|
||||
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
|
||||
# CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr0, 0
|
||||
# CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
---
|
||||
name: test
|
||||
tracksRegLiveness: true
|
||||
frameInfo:
|
||||
maxAlignment: 4
|
||||
stack:
|
||||
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
|
||||
- { id: 1, size: 4096, alignment: 4 }
|
||||
machineFunctionInfo:
|
||||
isEntryFunction: false
|
||||
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
stackPtrOffsetReg: '$sgpr32'
|
||||
frameOffsetReg: '$sgpr33'
|
||||
hasSpilledSGPRs: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11
|
||||
S_CMP_EQ_U32 0, 0, implicit-def $scc
|
||||
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||
renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
|
||||
S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
|
||||
...
|
Loading…
Reference in New Issue