forked from OSchip/llvm-project
493 lines
18 KiB
C++
493 lines
18 KiB
C++
//=== lib/CodeGen/GlobalISel/AMDGPURegBankCombiner.cpp ---------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass does combining of machine instructions at the generic MI level,
|
|
// after register banks are known.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPU.h"
|
|
#include "AMDGPULegalizerInfo.h"
|
|
#include "AMDGPURegisterBankInfo.h"
|
|
#include "GCNSubtarget.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIMachineFunctionInfo.h"
|
|
#include "llvm/CodeGen/GlobalISel/Combiner.h"
|
|
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
|
|
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
|
|
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
|
|
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
|
|
#include "llvm/CodeGen/MachineDominators.h"
|
|
#include "llvm/CodeGen/TargetPassConfig.h"
|
|
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
#define DEBUG_TYPE "amdgpu-regbank-combiner"
|
|
|
|
using namespace llvm;
|
|
using namespace MIPatternMatch;
|
|
|
|
class AMDGPURegBankCombinerHelper {
|
|
protected:
|
|
MachineIRBuilder &B;
|
|
MachineFunction &MF;
|
|
MachineRegisterInfo &MRI;
|
|
const GCNSubtarget &Subtarget;
|
|
const RegisterBankInfo &RBI;
|
|
const TargetRegisterInfo &TRI;
|
|
const SIInstrInfo &TII;
|
|
CombinerHelper &Helper;
|
|
|
|
public:
|
|
AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
|
|
: B(B), MF(B.getMF()), MRI(*B.getMRI()),
|
|
Subtarget(MF.getSubtarget<GCNSubtarget>()),
|
|
RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()),
|
|
TII(*Subtarget.getInstrInfo()), Helper(Helper){};
|
|
|
|
bool isVgprRegBank(Register Reg);
|
|
Register getAsVgpr(Register Reg);
|
|
|
|
struct MinMaxMedOpc {
|
|
unsigned Min, Max, Med;
|
|
};
|
|
|
|
struct Med3MatchInfo {
|
|
unsigned Opc;
|
|
Register Val0, Val1, Val2;
|
|
};
|
|
|
|
MinMaxMedOpc getMinMaxPair(unsigned Opc);
|
|
|
|
template <class m_Cst, typename CstTy>
|
|
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
|
|
Register &Val, CstTy &K0, CstTy &K1);
|
|
|
|
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
|
|
bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
|
|
bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg);
|
|
bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg);
|
|
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
|
|
void applyClamp(MachineInstr &MI, Register &Reg);
|
|
|
|
private:
|
|
AMDGPU::SIModeRegisterDefaults getMode();
|
|
bool getIEEE();
|
|
bool getDX10Clamp();
|
|
bool isFminnumIeee(const MachineInstr &MI);
|
|
bool isFCst(MachineInstr *MI);
|
|
bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1);
|
|
};
|
|
|
|
bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
|
|
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
|
|
}
|
|
|
|
Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
|
|
if (isVgprRegBank(Reg))
|
|
return Reg;
|
|
|
|
// Search for existing copy of Reg to vgpr.
|
|
for (MachineInstr &Use : MRI.use_instructions(Reg)) {
|
|
Register Def = Use.getOperand(0).getReg();
|
|
if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
|
|
return Def;
|
|
}
|
|
|
|
// Copy Reg to vgpr.
|
|
Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
|
|
MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
|
|
return VgprReg;
|
|
}
|
|
|
|
AMDGPURegBankCombinerHelper::MinMaxMedOpc
|
|
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
|
|
switch (Opc) {
|
|
default:
|
|
llvm_unreachable("Unsupported opcode");
|
|
case AMDGPU::G_SMAX:
|
|
case AMDGPU::G_SMIN:
|
|
return {AMDGPU::G_SMIN, AMDGPU::G_SMAX, AMDGPU::G_AMDGPU_SMED3};
|
|
case AMDGPU::G_UMAX:
|
|
case AMDGPU::G_UMIN:
|
|
return {AMDGPU::G_UMIN, AMDGPU::G_UMAX, AMDGPU::G_AMDGPU_UMED3};
|
|
case AMDGPU::G_FMAXNUM:
|
|
case AMDGPU::G_FMINNUM:
|
|
return {AMDGPU::G_FMINNUM, AMDGPU::G_FMAXNUM, AMDGPU::G_AMDGPU_FMED3};
|
|
case AMDGPU::G_FMAXNUM_IEEE:
|
|
case AMDGPU::G_FMINNUM_IEEE:
|
|
return {AMDGPU::G_FMINNUM_IEEE, AMDGPU::G_FMAXNUM_IEEE,
|
|
AMDGPU::G_AMDGPU_FMED3};
|
|
}
|
|
}
|
|
|
|
template <class m_Cst, typename CstTy>
|
|
bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
|
|
MachineRegisterInfo &MRI,
|
|
MinMaxMedOpc MMMOpc, Register &Val,
|
|
CstTy &K0, CstTy &K1) {
|
|
// 4 operand commutes of: min(max(Val, K0), K1).
|
|
// Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
|
|
// Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
|
|
// 4 operand commutes of: max(min(Val, K1), K0).
|
|
// Find K0 from outer instr: max(min(...), K0) or max(K0, min(...)).
|
|
// Find K1 and Val from inner instr: min(K1, Val) or min(Val, K1).
|
|
return mi_match(
|
|
MI, MRI,
|
|
m_any_of(
|
|
m_CommutativeBinOp(
|
|
MMMOpc.Min, m_CommutativeBinOp(MMMOpc.Max, m_Reg(Val), m_Cst(K0)),
|
|
m_Cst(K1)),
|
|
m_CommutativeBinOp(
|
|
MMMOpc.Max, m_CommutativeBinOp(MMMOpc.Min, m_Reg(Val), m_Cst(K1)),
|
|
m_Cst(K0))));
|
|
}
|
|
|
|
bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
|
|
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
if (!isVgprRegBank(Dst))
|
|
return false;
|
|
|
|
// med3 for i16 is only available on gfx9+, and not available for v2i16.
|
|
LLT Ty = MRI.getType(Dst);
|
|
if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
|
|
Ty != LLT::scalar(32))
|
|
return false;
|
|
|
|
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
|
|
Register Val;
|
|
Optional<ValueAndVReg> K0, K1;
|
|
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
|
|
if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
|
|
return false;
|
|
|
|
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
|
|
return false;
|
|
if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
|
|
return false;
|
|
|
|
MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
|
|
return true;
|
|
}
|
|
|
|
// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1)
|
|
// ieee = true : min/max(SNaN, K) = QNaN, min/max(QNaN, K) = K
|
|
// ieee = false : min/max(NaN, K) = K
|
|
// clamp(NaN) = dx10_clamp ? 0.0 : NaN
|
|
// Consider values of min(max(Val, K0), K1) and max(min(Val, K1), K0) as input.
|
|
// Other operand commutes (see matchMed) give same result since min and max are
|
|
// commutative.
|
|
|
|
// Try to replace fp min(max(Val, K0), K1) or max(min(Val, K1), K0), KO<=K1
|
|
// with fmed3(Val, K0, K1) or clamp(Val). Clamp requires K0 = 0.0 and K1 = 1.0.
|
|
// Val = SNaN only for ieee = true
|
|
// fmed3(SNaN, K0, K1) = min(min(SNaN, K0), K1) = min(QNaN, K1) = K1
|
|
// min(max(SNaN, K0), K1) = min(QNaN, K1) = K1
|
|
// max(min(SNaN, K1), K0) = max(K1, K0) = K1
|
|
// Val = NaN,ieee = false or Val = QNaN,ieee = true
|
|
// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
|
|
// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
|
|
// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
|
|
bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
|
|
MachineInstr &MI, Med3MatchInfo &MatchInfo) {
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
LLT Ty = MRI.getType(Dst);
|
|
|
|
// med3 for f16 is only available on gfx9+, and not available for v2f16.
|
|
if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
|
|
Ty != LLT::scalar(32))
|
|
return false;
|
|
|
|
auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
|
|
|
|
Register Val;
|
|
Optional<FPValueAndVReg> K0, K1;
|
|
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
|
|
if (!matchMed<GFCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
|
|
return false;
|
|
|
|
if (K0->Value > K1->Value)
|
|
return false;
|
|
|
|
// For IEEE=false perform combine only when it's safe to assume that there are
|
|
// no NaN inputs. Most often MI is marked with nnan fast math flag.
|
|
// For IEEE=true consider NaN inputs. fmed3(NaN, K0, K1) is equivalent to
|
|
// min(min(NaN, K0), K1). Safe to fold for min(max(Val, K0), K1) since inner
|
|
// nodes(max/min) have same behavior when one input is NaN and other isn't.
|
|
// Don't consider max(min(SNaN, K1), K0) since there is no isKnownNeverQNaN,
|
|
// also post-legalizer inputs to min/max are fcanonicalized (never SNaN).
|
|
if ((getIEEE() && isFminnumIeee(MI)) || isKnownNeverNaN(Dst, MRI)) {
|
|
// Don't fold single use constant that can't be inlined.
|
|
if ((!MRI.hasOneNonDBGUse(K0->VReg) || TII.isInlineConstant(K0->Value)) &&
|
|
(!MRI.hasOneNonDBGUse(K1->VReg) || TII.isInlineConstant(K1->Value))) {
|
|
MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI,
|
|
Register &Reg) {
|
|
// Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
|
|
auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
|
|
Register Val;
|
|
Optional<FPValueAndVReg> K0, K1;
|
|
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0).
|
|
if (!matchMed<GFCstOrSplatGFCstMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
|
|
return false;
|
|
|
|
if (!K0->Value.isExactlyValue(0.0) || !K1->Value.isExactlyValue(1.0))
|
|
return false;
|
|
|
|
// For IEEE=false perform combine only when it's safe to assume that there are
|
|
// no NaN inputs. Most often MI is marked with nnan fast math flag.
|
|
// For IEEE=true consider NaN inputs. Only min(max(QNaN, 0.0), 1.0) evaluates
|
|
// to 0.0 requires dx10_clamp = true.
|
|
if ((getIEEE() && getDX10Clamp() && isFminnumIeee(MI) &&
|
|
isKnownNeverSNaN(Val, MRI)) ||
|
|
isKnownNeverNaN(MI.getOperand(0).getReg(), MRI)) {
|
|
Reg = Val;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// Replacing fmed3(NaN, 0.0, 1.0) with clamp. Requires dx10_clamp = true.
|
|
// Val = SNaN only for ieee = true. It is important which operand is NaN.
|
|
// min(min(SNaN, 0.0), 1.0) = min(QNaN, 1.0) = 1.0
|
|
// min(min(SNaN, 1.0), 0.0) = min(QNaN, 0.0) = 0.0
|
|
// min(min(0.0, 1.0), SNaN) = min(0.0, SNaN) = QNaN
|
|
// Val = NaN,ieee = false or Val = QNaN,ieee = true
|
|
// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
|
|
// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
|
|
// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
|
|
bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI,
|
|
Register &Reg) {
|
|
if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
|
|
return false;
|
|
|
|
// In llvm-ir, clamp is often represented as an intrinsic call to
|
|
// @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
|
|
MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
|
|
MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
|
|
MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
|
|
|
|
if (isFCst(Src0) && !isFCst(Src1))
|
|
std::swap(Src0, Src1);
|
|
if (isFCst(Src1) && !isFCst(Src2))
|
|
std::swap(Src1, Src2);
|
|
if (isFCst(Src0) && !isFCst(Src1))
|
|
std::swap(Src0, Src1);
|
|
if (!isClampZeroToOne(Src1, Src2))
|
|
return false;
|
|
|
|
Register Val = Src0->getOperand(0).getReg();
|
|
|
|
auto isOp3Zero = [&]() {
|
|
MachineInstr *Op3 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
|
|
if (Op3->getOpcode() == TargetOpcode::G_FCONSTANT)
|
|
return Op3->getOperand(1).getFPImm()->isExactlyValue(0.0);
|
|
return false;
|
|
};
|
|
// For IEEE=false perform combine only when it's safe to assume that there are
|
|
// no NaN inputs. Most often MI is marked with nnan fast math flag.
|
|
// For IEEE=true consider NaN inputs. Requires dx10_clamp = true. Safe to fold
|
|
// when Val could be QNaN. If Val can also be SNaN third input should be 0.0.
|
|
if (isKnownNeverNaN(MI.getOperand(0).getReg(), MRI) ||
|
|
(getIEEE() && getDX10Clamp() &&
|
|
(isKnownNeverSNaN(Val, MRI) || isOp3Zero()))) {
|
|
Reg = Val;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) {
|
|
B.setInstrAndDebugLoc(MI);
|
|
B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
|
|
MI.getFlags());
|
|
MI.eraseFromParent();
|
|
}
|
|
|
|
void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
|
|
Med3MatchInfo &MatchInfo) {
|
|
B.setInstrAndDebugLoc(MI);
|
|
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
|
|
{getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
|
|
getAsVgpr(MatchInfo.Val2)},
|
|
MI.getFlags());
|
|
MI.eraseFromParent();
|
|
}
|
|
|
|
AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
|
|
return MF.getInfo<SIMachineFunctionInfo>()->getMode();
|
|
}
|
|
|
|
bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
|
|
|
|
bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
|
|
|
|
bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
|
|
return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
|
|
}
|
|
|
|
bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
|
|
return MI->getOpcode() == AMDGPU::G_FCONSTANT;
|
|
}
|
|
|
|
bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
|
|
MachineInstr *K1) {
|
|
if (isFCst(K0) && isFCst(K1)) {
|
|
const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
|
|
const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
|
|
return (KO_FPImm->isExactlyValue(0.0) && K1_FPImm->isExactlyValue(1.0)) ||
|
|
(KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isExactlyValue(0.0));
|
|
}
|
|
return false;
|
|
}
|
|
|
|
class AMDGPURegBankCombinerHelperState {
|
|
protected:
|
|
CombinerHelper &Helper;
|
|
AMDGPURegBankCombinerHelper &RegBankHelper;
|
|
|
|
public:
|
|
AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
|
|
AMDGPURegBankCombinerHelper &RegBankHelper)
|
|
: Helper(Helper), RegBankHelper(RegBankHelper) {}
|
|
};
|
|
|
|
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
|
#include "AMDGPUGenRegBankGICombiner.inc"
|
|
#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
|
|
|
|
namespace {
|
|
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
|
|
#include "AMDGPUGenRegBankGICombiner.inc"
|
|
#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
|
|
|
|
class AMDGPURegBankCombinerInfo final : public CombinerInfo {
|
|
GISelKnownBits *KB;
|
|
MachineDominatorTree *MDT;
|
|
|
|
public:
|
|
AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
|
|
|
|
AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
|
|
const AMDGPULegalizerInfo *LI,
|
|
GISelKnownBits *KB, MachineDominatorTree *MDT)
|
|
: CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
|
|
/*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
|
|
KB(KB), MDT(MDT) {
|
|
if (!GeneratedRuleCfg.parseCommandLineOption())
|
|
report_fatal_error("Invalid rule identifier");
|
|
}
|
|
|
|
bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
|
|
MachineIRBuilder &B) const override;
|
|
};
|
|
|
|
bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
|
|
MachineInstr &MI,
|
|
MachineIRBuilder &B) const {
|
|
CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT);
|
|
AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
|
|
AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
|
|
RegBankHelper);
|
|
|
|
if (Generated.tryCombineAll(Observer, MI, B))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
|
|
#include "AMDGPUGenRegBankGICombiner.inc"
|
|
#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
|
|
|
|
// Pass boilerplate
|
|
// ================
|
|
|
|
class AMDGPURegBankCombiner : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
AMDGPURegBankCombiner(bool IsOptNone = false);
|
|
|
|
StringRef getPassName() const override {
|
|
return "AMDGPURegBankCombiner";
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
|
private:
|
|
bool IsOptNone;
|
|
};
|
|
} // end anonymous namespace
|
|
|
|
void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
AU.addRequired<TargetPassConfig>();
|
|
AU.setPreservesCFG();
|
|
getSelectionDAGFallbackAnalysisUsage(AU);
|
|
AU.addRequired<GISelKnownBitsAnalysis>();
|
|
AU.addPreserved<GISelKnownBitsAnalysis>();
|
|
if (!IsOptNone) {
|
|
AU.addRequired<MachineDominatorTree>();
|
|
AU.addPreserved<MachineDominatorTree>();
|
|
}
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
|
|
AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
|
|
: MachineFunctionPass(ID), IsOptNone(IsOptNone) {
|
|
initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
|
|
if (MF.getProperties().hasProperty(
|
|
MachineFunctionProperties::Property::FailedISel))
|
|
return false;
|
|
auto *TPC = &getAnalysis<TargetPassConfig>();
|
|
const Function &F = MF.getFunction();
|
|
bool EnableOpt =
|
|
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
const AMDGPULegalizerInfo *LI
|
|
= static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
|
|
|
|
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
|
|
MachineDominatorTree *MDT =
|
|
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
|
|
AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
|
|
F.hasMinSize(), LI, KB, MDT);
|
|
Combiner C(PCInfo, TPC);
|
|
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
|
|
}
|
|
|
|
char AMDGPURegBankCombiner::ID = 0;
|
|
INITIALIZE_PASS_BEGIN(AMDGPURegBankCombiner, DEBUG_TYPE,
|
|
"Combine AMDGPU machine instrs after regbankselect",
|
|
false, false)
|
|
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
|
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
|
|
INITIALIZE_PASS_END(AMDGPURegBankCombiner, DEBUG_TYPE,
|
|
"Combine AMDGPU machine instrs after regbankselect", false,
|
|
false)
|
|
|
|
namespace llvm {
|
|
FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone) {
|
|
return new AMDGPURegBankCombiner(IsOptNone);
|
|
}
|
|
} // end namespace llvm
|