llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp

//===-- RISCVRegisterInfo.cpp - RISCV Register Information ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the RISCV implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//

#include "RISCVRegisterInfo.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVSubtarget.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/ErrorHandling.h"

#define GET_REGINFO_TARGET_DESC
#include "RISCVGenRegisterInfo.inc"

using namespace llvm;

static cl::opt<bool>
    DisableRegAllocHints("riscv-disable-regalloc-hints", cl::Hidden,
                         cl::init(false),
                         cl::desc("Disable two address hints for register "
                                  "allocation"));

static_assert(RISCV::X1 == RISCV::X0 + 1, "Register list not consecutive");
static_assert(RISCV::X31 == RISCV::X0 + 31, "Register list not consecutive");
static_assert(RISCV::F1_H == RISCV::F0_H + 1, "Register list not consecutive");
static_assert(RISCV::F31_H == RISCV::F0_H + 31,
              "Register list not consecutive");
static_assert(RISCV::F1_F == RISCV::F0_F + 1, "Register list not consecutive");
static_assert(RISCV::F31_F == RISCV::F0_F + 31,
              "Register list not consecutive");
static_assert(RISCV::F1_D == RISCV::F0_D + 1, "Register list not consecutive");
static_assert(RISCV::F31_D == RISCV::F0_D + 31,
              "Register list not consecutive");
static_assert(RISCV::V1 == RISCV::V0 + 1, "Register list not consecutive");
static_assert(RISCV::V31 == RISCV::V0 + 31, "Register list not consecutive");

RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode)
    : RISCVGenRegisterInfo(RISCV::X1, /*DwarfFlavour*/0, /*EHFlavor*/0,
                           /*PC*/0, HwMode) {}

const MCPhysReg *
RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
  assert(!MF->getFunction().hasFnAttribute("interrupt") &&
         "Ventus GPGPU doesn't support interrupt!");
  auto &Subtarget = MF->getSubtarget<RISCVSubtarget>();
  switch (Subtarget.getTargetABI()) {
  default:
    llvm_unreachable("Unrecognized ABI");
  case RISCVABI::ABI_ILP32:
  case RISCVABI::ABI_LP64:
    return CSR_ILP32_LP64_SaveList;
  case RISCVABI::ABI_ILP32F:
  case RISCVABI::ABI_LP64F:
    return CSR_ILP32F_LP64F_SaveList;
  case RISCVABI::ABI_ILP32D:
  case RISCVABI::ABI_LP64D:
    return CSR_ILP32D_LP64D_SaveList;
  }
}

BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
  const RISCVFrameLowering *TFI = getFrameLowering(MF);
  BitVector Reserved(getNumRegs());

  // Mark any registers requested to be reserved as such
  for (size_t Reg = 0; Reg < getNumRegs(); Reg++) {
    if (MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(Reg))
      markSuperRegs(Reserved, Reg);
  }

  // Use markSuperRegs to ensure any register aliases are also reserved
  markSuperRegs(Reserved, RISCV::X0); // zero
  markSuperRegs(Reserved, RISCV::X2); // sp
  markSuperRegs(Reserved, RISCV::X3); // gp
  markSuperRegs(Reserved, RISCV::X4); // tp
  if (TFI->hasFP(MF))
    markSuperRegs(Reserved, RISCV::X8); // fp
  // Reserve the base register if we need to realign the stack and allocate
  // variable-sized objects at runtime.
  if (TFI->hasBP(MF))
    markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp

  // Floating point environment registers.
  markSuperRegs(Reserved, RISCV::FRM);
  markSuperRegs(Reserved, RISCV::FFLAGS);

  markSuperRegs(Reserved, getPrivateMemoryBaseRegister(
                                const_cast<MachineFunction&>(MF)));

  assert(checkAllSuperRegsMarked(Reserved));
  return Reserved;
}

bool RISCVRegisterInfo::isAsmClobberable(const MachineFunction &MF,
                                         MCRegister PhysReg) const {
  return !MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(PhysReg);
}

const uint32_t *RISCVRegisterInfo::getNoPreservedMask() const {
  return CSR_NoRegs_RegMask;
}

// Frame indexes representing locations of CSRs which are given a fixed location
// by save/restore libcalls.
static const std::pair<unsigned, int> FixedCSRFIMap[] = {
  {/*ra*/  RISCV::X1,   -1},
  {/*s0*/  RISCV::X8,   -2},
  {/*s1*/  RISCV::X9,   -3},
  {/*s2*/  RISCV::X18,  -4},
  {/*s3*/  RISCV::X19,  -5},
  {/*s4*/  RISCV::X20,  -6},
  {/*s5*/  RISCV::X21,  -7},
  {/*s6*/  RISCV::X22,  -8},
  {/*s7*/  RISCV::X23,  -9},
  {/*s8*/  RISCV::X24,  -10},
  {/*s9*/  RISCV::X25,  -11},
  {/*s10*/ RISCV::X26,  -12},
  {/*s11*/ RISCV::X27,  -13}
};

bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
                                             Register Reg,
                                             int &FrameIdx) const {
  const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
  if (!RVFI->useSaveRestoreLibCalls(MF))
    return false;

  const auto *FII =
      llvm::find_if(FixedCSRFIMap, [&](auto P) { return P.first == Reg; });
  if (FII == std::end(FixedCSRFIMap))
    return false;

  FrameIdx = FII->second;
  return true;
}

/// Returns a lowest register that is not used at any point in the function.
///        If all registers are used, then this function will return
///         RISCV::NoRegister. If \p ReserveHighestVGPR = true, then return
///         highest unused register.
MCRegister RISCVRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
                                              const TargetRegisterClass *RC,
                                              const MachineFunction &MF,
                                              bool ReserveHighestVGPR) const {
  if (ReserveHighestVGPR) {
    for (MCRegister Reg : reverse(*RC))
      if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
        return Reg;
  } else {
    for (MCRegister Reg : *RC)
      if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
        return Reg;
  }
  return MCRegister();
}

void RISCVRegisterInfo::analyzeRegisterUsage(DenseSet<Register> RewriteRegs,
                                      MachineFunction *MF) const  {
  auto CurrentProgramInfo = const_cast<VentusProgramInfo*>(
                    MF->getSubtarget<RISCVSubtarget>().getVentusProgramInfo());
  MachineRegisterInfo &MRI = MF->getRegInfo();
  for(auto Reg : RewriteRegs) {
      if(!isSGPRReg(MRI, Reg))
        CurrentProgramInfo->VGPRUsage++;
      else
        CurrentProgramInfo->SGPRUsage++;
  }
  // FIXME: need to add two more because of ra && sp, how to simplify this?
  CurrentProgramInfo->SGPRUsage += 2;
}

bool RISCVRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
                                  Register Reg) const {
  const TargetRegisterClass *RC;
  if (Reg.isVirtual())
    RC = MRI.getRegClass(Reg);
  else
    RC = getPhysRegClass(Reg);
  return RC ? isSGPRClass(RC) : false;
}

const Register RISCVRegisterInfo::getPrivateMemoryBaseRegister(
                        const MachineFunction &MF) const {
  // FIXME: V0-V31 are used for argument registers, so here we use V32 for
  // private memory based register, but V32 is beyond the 5 bits ranges, when
  // this register are used, one more instruction is used
  // since v0-v7 is used in variadic function arguments
  return  MF.getFunction().isVarArg() ? RISCV::V8 : RISCV::V32;
}

const TargetRegisterClass *
RISCVRegisterInfo::getPhysRegClass(MCRegister Reg) const {
  static const TargetRegisterClass *const BaseClasses[] = {
    /*
    &RISCV::VGPR_LO16RegClass,
    &RISCV::VGPR_HI16RegClass,
    &RISCV::SReg_LO16RegClass,
    &RISCV::SReg_HI16RegClass,
    &RISCV::SReg_32RegClass,
    */
    &RISCV::VGPRRegClass,
    &RISCV::GPRRegClass,
  };

  for (const TargetRegisterClass *BaseClass : BaseClasses) {
    if (BaseClass->contains(Reg)) {
      return BaseClass;
    }
  }
  assert(0 && "TODO: Add sub/super registers");
  return nullptr;
}

void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator II,
                                  const DebugLoc &DL, Register DestReg,
                                  Register SrcReg, StackOffset Offset,
                                  MachineInstr::MIFlag Flag,
                                  MaybeAlign RequiredAlign) const {

  if (DestReg == SrcReg && !Offset.getFixed() && !Offset.getScalable())
    return;

  MachineFunction &MF = *MBB.getParent();
  MachineRegisterInfo &MRI = MF.getRegInfo();
  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
  const RISCVInstrInfo *TII = ST.getInstrInfo();

  bool KillSrcReg = false;

  int64_t Val = Offset.getFixed();
  if (DestReg == SrcReg && Val == 0)
    return;

  const uint64_t Align = RequiredAlign.valueOrOne().value();

  if (isInt<12>(Val)) {
    BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), DestReg)
        .addReg(SrcReg, getKillRegState(KillSrcReg))
        .addImm(Val)
        .setMIFlag(Flag);
    return;
  }

  // Try to split the offset across two ADDIs. We need to keep the intermediate
  // result aligned after each ADDI.  We need to determine the maximum value we
  // can put in each ADDI. In the negative direction, we can use -2048 which is
  // always sufficiently aligned. In the positive direction, we need to find the
  // largest 12-bit immediate that is aligned.  Exclude -4096 since it can be
  // created with LUI.
  assert(Align < 2048 && "Required alignment too large");
  int64_t MaxPosAdjStep = 2048 - Align;
  if (Val > -4096 && Val <= (2 * MaxPosAdjStep)) {
    int64_t FirstAdj = Val < 0 ? -2048 : MaxPosAdjStep;
    Val -= FirstAdj;
    // Keep the intermediate aligned after each ADDI no matter for SP or TP
    BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), DestReg)
        .addReg(SrcReg, getKillRegState(KillSrcReg))
        .addImm(FirstAdj)
        .setMIFlag(Flag);
    BuildMI(MBB, II, DL, TII->get(RISCV::ADDI), DestReg)
        .addReg(DestReg, RegState::Kill)
        .addImm(Val)
        .setMIFlag(Flag);
    return;
  }

  unsigned Opc = RISCV::ADD;
  if (Val < 0) {
    Val = -Val;
    Opc = RISCV::SUB;
  }

  Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
  TII->movImm(MBB, II, DL, ScratchReg, Val, Flag);
  BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
      .addReg(SrcReg, getKillRegState(KillSrcReg))
      .addReg(ScratchReg, RegState::Kill)
      .setMIFlag(Flag);
}

void RISCVRegisterInfo::adjustPriMemRegOffset(MachineFunction &MF,
              MachineBasicBlock &MBB, MachineInstr &MI, int64_t Offset,
              Register PriMemReg, unsigned FIOperandNum) const{
  auto &MRI = MF.getRegInfo();
  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
  const RISCVInstrInfo *TII = ST.getInstrInfo();
  assert(!isSGPRReg(MRI, PriMemReg) && "Private memory base address in VGPR");
  bool IsNegative = (Offset < -1024);
  (--MI.getIterator());
  Register ScratchReg = MRI.createVirtualRegister(&RISCV::VGPRRegClass);
  // FIXME: maybe it is better change offset once rather than insert a new
  // machine instruction??
  BuildMI(MBB, --MI.getIterator(), (--MI.getIterator())->getDebugLoc(),
    TII->get(RISCV::VADD_VI))
    .addReg(ScratchReg)
    .addReg(PriMemReg)
    .addImm(IsNegative ? (Offset / -1024) * 1024 : -(Offset / 1024) * 1024);
  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(IsNegative ?
          Offset + (Offset / -1024) * 1024
          : Offset - (Offset / 1024) * 1024);
  MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg,
                                            /*IsDef*/false,
                                            /*IsImp*/false,
                                            /*IsKill*/true);
}

/// This function is to eliminate frame index for MachineInstruction in
/// StoreRegToSlot/LoadRegFromSlot function
bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                            int SPAdj, unsigned FIOperandNum,
                                            RegScavenger *RS) const {
  assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");

  MachineInstr &MI = *II;
  MachineFunction &MF = *MI.getParent()->getParent();
  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
  const RISCVRegisterInfo * RI = ST.getRegisterInfo();
  const RISCVInstrInfo *RII = ST.getInstrInfo();
  DebugLoc DL = MI.getDebugLoc();

  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
  auto FrameIndexID = MF.getFrameInfo().getStackID(FrameIndex);

  Register FrameReg;
  StackOffset Offset = // FIXME: The FrameReg and Offset should be depended on divergency route.
      getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
  // TODO: finish
  // if(!RII->isVGPRMemoryAccess(MI))
  //   Offset -= StackOffset::getFixed(
  //             MF.getInfo<RISCVMachineFunctionInfo>()->getVarArgsSaveSize() - 4);
  int64_t Lo11 = Offset.getFixed();
  Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());

  if (!isInt<32>(Offset.getFixed())) {
    report_fatal_error(
        "Frame offsets outside of the signed 32-bit range not supported");
  }

  if (MI.getOpcode() == RISCV::ADDI && !isInt<11>(Offset.getFixed())) {
    // We chose to emit the canonical immediate sequence rather than folding
    // the offset into the using add under the theory that doing so doesn't
    // save dynamic instruction count and some target may fuse the canonical
    // 32 bit immediate sequence.  We still need to clear the portion of the
    // offset encoded in the immediate.
    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(0);
  } else {
    // We can encode an add with 12 bit signed immediate in the immediate
    // operand of our user instruction.  As a result, the remaining
    // offset can by construction, at worst, a LUI and a ADD.
    int64_t Val = Offset.getFixed();
    Lo11 = SignExtend64<11>(Val);


    MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Lo11);
    Offset = StackOffset::get((uint64_t)Val - (uint64_t)Lo11,
                              Offset.getScalable());
  }
  if(MI.getOpcode() == RISCV::ADDI &&
        static_cast<unsigned>(FrameIndexID) == RISCVStackID::VGPRSpill) {
    MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg,
                                                /*IsDef*/false,
                                                /*IsImp*/false,
                                                /*IsKill*/false);

  }

  if(RII->isVGPRMemoryAccess(MI)) {
    MI.getOperand(FIOperandNum).ChangeToRegister(getPrivateMemoryBaseRegister(MF),
                                            /*IsDef*/false,
                                            /*IsImp*/false,
                                            /*IsKill*/false);
    // simm11 locates in range [-1024, 1023], if offset not in this range, then
    // we legalize the offset
    if(!isInt<11>(Lo11))
      adjustPriMemRegOffset(MF, *MI.getParent(), MI, Lo11,
                                getPrivateMemoryBaseRegister(MF), FIOperandNum);
  }

  else
    MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*IsDef*/false,
                                                /*IsImp*/false,
                                                /*IsKill*/false);

  // If after materializing the adjustment, we have a pointless ADDI, remove it
  if (MI.getOpcode() == RISCV::ADDI &&
      MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
      MI.getOperand(2).getImm() == 0) {
    MI.eraseFromParent();
    return true;
  }

  return false;
}

Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
  return MF.getInfo<RISCVMachineFunctionInfo>()->isEntryFunction()
      ? RISCV::X2 : RISCV::X4;
}

const uint32_t *
RISCVRegisterInfo::getCallPreservedMask(const MachineFunction & MF,
                                        CallingConv::ID CC) const {
  auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();

  if (CC == CallingConv::GHC)
    return CSR_NoRegs_RegMask;
  switch (Subtarget.getTargetABI()) {
  default:
    llvm_unreachable("Unrecognized ABI");
  case RISCVABI::ABI_ILP32:
  case RISCVABI::ABI_LP64:
    return CSR_ILP32_LP64_RegMask;
  case RISCVABI::ABI_ILP32F:
  case RISCVABI::ABI_LP64F:
    return CSR_ILP32F_LP64F_RegMask;
  case RISCVABI::ABI_ILP32D:
  case RISCVABI::ABI_LP64D:
    return CSR_ILP32D_LP64D_RegMask;
  }
}

const TargetRegisterClass *
RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
                                             const MachineFunction &) const {
  return RC;
}

void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset,
                                         SmallVectorImpl<uint64_t> &Ops) const {
  // VLENB is the length of a vector register in bytes. We use <vscale x 8 x i8>
  // to represent one vector register. The dwarf offset is
  // VLENB * scalable_offset / 8.
  assert(Offset.getScalable() % 8 == 0 && "Invalid frame offset");

  // Add fixed-sized offset using existing DIExpression interface.
  DIExpression::appendOffset(Ops, Offset.getFixed());

  unsigned VLENB = getDwarfRegNum(RISCV::VLENB, true);
  int64_t VLENBSized = Offset.getScalable() / 8;
  if (VLENBSized > 0) {
    Ops.push_back(dwarf::DW_OP_constu);
    Ops.push_back(VLENBSized);
    Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL});
    Ops.push_back(dwarf::DW_OP_mul);
    Ops.push_back(dwarf::DW_OP_plus);
  } else if (VLENBSized < 0) {
    Ops.push_back(dwarf::DW_OP_constu);
    Ops.push_back(-VLENBSized);
    Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL});
    Ops.push_back(dwarf::DW_OP_mul);
    Ops.push_back(dwarf::DW_OP_minus);
  }
}

unsigned
RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const {
  return MF.getSubtarget<RISCVSubtarget>().hasStdExtC() ? 1 : 0;
}

// Add two address hints to improve chances of being able to use a compressed
// instruction.
bool RISCVRegisterInfo::getRegAllocationHints(
    Register VirtReg, ArrayRef<MCPhysReg> Order,
    SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
    const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
  const MachineRegisterInfo *MRI = &MF.getRegInfo();

  bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
      VirtReg, Order, Hints, MF, VRM, Matrix);

  if (!VRM || DisableRegAllocHints)
    return BaseImplRetVal;

  // Add any two address hints after any copy hints.
  SmallSet<Register, 4> TwoAddrHints;

  auto tryAddHint = [&](const MachineOperand &VRRegMO, const MachineOperand &MO,
                        bool NeedGPRC) -> void {
    Register Reg = MO.getReg();
    Register PhysReg =
        Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
    if (PhysReg && (!NeedGPRC || RISCV::GPRCRegClass.contains(PhysReg))) {
      assert(!MO.getSubReg() && !VRRegMO.getSubReg() && "Unexpected subreg!");
      if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
        TwoAddrHints.insert(PhysReg);
    }
  };

  // This is all of the compressible binary instructions. If an instruction
  // needs GPRC register class operands \p NeedGPRC will be set to true.
  auto isCompressible = [](const MachineInstr &MI, bool &NeedGPRC) {
    NeedGPRC = false;
    switch (MI.getOpcode()) {
    default:
      return false;
    case RISCV::AND:
    case RISCV::OR:
    case RISCV::XOR:
    case RISCV::SUB:
    case RISCV::ADDW:
    case RISCV::SUBW:
      NeedGPRC = true;
      return true;
    case RISCV::ANDI:
      NeedGPRC = true;
      return MI.getOperand(2).isImm() && isInt<6>(MI.getOperand(2).getImm());
    case RISCV::SRAI:
    case RISCV::SRLI:
      NeedGPRC = true;
      return true;
    case RISCV::ADD:
    case RISCV::SLLI:
      return true;
    case RISCV::ADDI:
    case RISCV::ADDIW:
      return MI.getOperand(2).isImm() && isInt<6>(MI.getOperand(2).getImm());
    }
  };

  // Returns true if this operand is compressible. For non-registers it always
  // returns true. Immediate range was already checked in isCompressible.
  // For registers, it checks if the register is a GPRC register. reg-reg
  // instructions that require GPRC need all register operands to be GPRC.
  auto isCompressibleOpnd = [&](const MachineOperand &MO) {
    if (!MO.isReg())
      return true;
    Register Reg = MO.getReg();
    Register PhysReg =
        Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
    return PhysReg && RISCV::GPRCRegClass.contains(PhysReg);
  };

  for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
    const MachineInstr &MI = *MO.getParent();
    unsigned OpIdx = MI.getOperandNo(&MO);
    bool NeedGPRC;
    if (isCompressible(MI, NeedGPRC)) {
      if (OpIdx == 0 && MI.getOperand(1).isReg()) {
        if (!NeedGPRC || isCompressibleOpnd(MI.getOperand(2)))
          tryAddHint(MO, MI.getOperand(1), NeedGPRC);
        if (MI.isCommutable() && MI.getOperand(2).isReg() &&
            (!NeedGPRC || isCompressibleOpnd(MI.getOperand(1))))
          tryAddHint(MO, MI.getOperand(2), NeedGPRC);
      } else if (OpIdx == 1 &&
                 (!NeedGPRC || isCompressibleOpnd(MI.getOperand(2)))) {
        tryAddHint(MO, MI.getOperand(0), NeedGPRC);
      } else if (MI.isCommutable() && OpIdx == 2 &&
                 (!NeedGPRC || isCompressibleOpnd(MI.getOperand(1)))) {
        tryAddHint(MO, MI.getOperand(0), NeedGPRC);
      }
    }
  }

  for (MCPhysReg OrderReg : Order)
    if (TwoAddrHints.count(OrderReg))
      Hints.push_back(OrderReg);

  return BaseImplRetVal;
}