forked from OSchip/llvm-project
215 lines
7.2 KiB
C++
215 lines
7.2 KiB
C++
//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
/// Pass to temporarily raise the wave priority beginning the start of
|
|
/// the shader function until its last VMEM instructions to allow younger
|
|
/// waves to issue their VMEM instructions as well.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "llvm/ADT/PostOrderIterator.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/InitializePasses.h"
|
|
#include "llvm/Support/Allocator.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "amdgpu-set-wave-priority"
|
|
|
|
static cl::opt<unsigned> DefaultVALUInstsThreshold(
|
|
"amdgpu-set-wave-priority-valu-insts-threshold",
|
|
cl::desc("VALU instruction count threshold for adjusting wave priority"),
|
|
cl::init(100), cl::Hidden);
|
|
|
|
namespace {
|
|
|
|
struct MBBInfo {
|
|
MBBInfo() = default;
|
|
unsigned NumVALUInstsAtStart = 0;
|
|
bool MayReachVMEMLoad = false;
|
|
MachineInstr *LastVMEMLoad = nullptr;
|
|
};
|
|
|
|
using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;
|
|
|
|
class AMDGPUSetWavePriority : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
|
|
|
|
StringRef getPassName() const override { return "Set wave priority"; }
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
private:
|
|
MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator I,
|
|
unsigned priority) const;
|
|
|
|
const SIInstrInfo *TII;
|
|
};
|
|
|
|
} // End anonymous namespace.
|
|
|
|
INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
|
|
false)
|
|
|
|
char AMDGPUSetWavePriority::ID = 0;
|
|
|
|
FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
|
|
return new AMDGPUSetWavePriority();
|
|
}
|
|
|
|
MachineInstr *
|
|
AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
|
|
MachineBasicBlock::iterator I,
|
|
unsigned priority) const {
|
|
return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
|
|
.addImm(priority);
|
|
}
|
|
|
|
// Checks that for every predecessor Pred that can reach a VMEM load,
|
|
// none of Pred's successors can reach a VMEM load.
|
|
static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
|
|
MBBInfoSet &MBBInfos) {
|
|
for (const MachineBasicBlock *Pred : MBB.predecessors()) {
|
|
if (!MBBInfos[Pred].MayReachVMEMLoad)
|
|
continue;
|
|
for (const MachineBasicBlock *Succ : Pred->successors()) {
|
|
if (MBBInfos[Succ].MayReachVMEMLoad)
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool isVMEMLoad(const MachineInstr &MI) {
|
|
return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
|
|
}
|
|
|
|
bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
|
|
const unsigned HighPriority = 3;
|
|
const unsigned LowPriority = 0;
|
|
|
|
Function &F = MF.getFunction();
|
|
if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
|
|
return false;
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
TII = ST.getInstrInfo();
|
|
|
|
unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
|
|
Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
|
|
if (A.isValid())
|
|
A.getValueAsString().getAsInteger(0, VALUInstsThreshold);
|
|
|
|
// Find VMEM loads that may be executed before long-enough sequences of
|
|
// VALU instructions. We currently assume that backedges/loops, branch
|
|
// probabilities and other details can be ignored, so we essentially
|
|
// determine the largest number of VALU instructions along every
|
|
// possible path from the start of the function that may potentially be
|
|
// executed provided no backedge is ever taken.
|
|
MBBInfoSet MBBInfos;
|
|
for (MachineBasicBlock *MBB : post_order(&MF)) {
|
|
bool AtStart = true;
|
|
unsigned MaxNumVALUInstsInMiddle = 0;
|
|
unsigned NumVALUInstsAtEnd = 0;
|
|
for (MachineInstr &MI : *MBB) {
|
|
if (isVMEMLoad(MI)) {
|
|
AtStart = false;
|
|
MBBInfo &Info = MBBInfos[MBB];
|
|
Info.NumVALUInstsAtStart = 0;
|
|
MaxNumVALUInstsInMiddle = 0;
|
|
NumVALUInstsAtEnd = 0;
|
|
Info.LastVMEMLoad = &MI;
|
|
} else if (SIInstrInfo::isDS(MI)) {
|
|
AtStart = false;
|
|
MaxNumVALUInstsInMiddle =
|
|
std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
|
|
NumVALUInstsAtEnd = 0;
|
|
} else if (SIInstrInfo::isVALU(MI)) {
|
|
if (AtStart)
|
|
++MBBInfos[MBB].NumVALUInstsAtStart;
|
|
++NumVALUInstsAtEnd;
|
|
}
|
|
}
|
|
|
|
bool SuccsMayReachVMEMLoad = false;
|
|
unsigned NumFollowingVALUInsts = 0;
|
|
for (const MachineBasicBlock *Succ : MBB->successors()) {
|
|
SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;
|
|
NumFollowingVALUInsts =
|
|
std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);
|
|
}
|
|
MBBInfo &Info = MBBInfos[MBB];
|
|
if (AtStart)
|
|
Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
|
|
NumVALUInstsAtEnd += NumFollowingVALUInsts;
|
|
|
|
unsigned MaxNumVALUInsts =
|
|
std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
|
|
Info.MayReachVMEMLoad =
|
|
SuccsMayReachVMEMLoad ||
|
|
(Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
|
|
}
|
|
|
|
MachineBasicBlock &Entry = MF.front();
|
|
if (!MBBInfos[&Entry].MayReachVMEMLoad)
|
|
return false;
|
|
|
|
// Raise the priority at the beginning of the shader.
|
|
MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
|
|
while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
|
|
++I;
|
|
BuildSetprioMI(Entry, I, HighPriority);
|
|
|
|
// Lower the priority on edges where control leaves blocks from which
|
|
// the VMEM loads are reachable.
|
|
SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
if (MBBInfos[&MBB].MayReachVMEMLoad) {
|
|
if (MBB.succ_empty())
|
|
PriorityLoweringBlocks.insert(&MBB);
|
|
continue;
|
|
}
|
|
|
|
if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
|
|
for (MachineBasicBlock *Pred : MBB.predecessors()) {
|
|
if (MBBInfos[Pred].MayReachVMEMLoad)
|
|
PriorityLoweringBlocks.insert(Pred);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Where lowering the priority in predecessors is not possible, the
|
|
// block receiving control either was not part of a loop in the first
|
|
// place or the loop simplification/canonicalization pass should have
|
|
// already tried to split the edge and insert a preheader, and if for
|
|
// whatever reason it failed to do so, then this leaves us with the
|
|
// only option of lowering the priority within the loop.
|
|
PriorityLoweringBlocks.insert(&MBB);
|
|
}
|
|
|
|
for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
|
|
BuildSetprioMI(
|
|
*MBB,
|
|
MBBInfos[MBB].LastVMEMLoad
|
|
? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
|
|
: MBB->begin(),
|
|
LowPriority);
|
|
}
|
|
|
|
return true;
|
|
}
|