[AMDGPU] Make vector superclasses allocatable

The combined vector register classes with both
VGPRs and AGPRs are currently unallocatable.
This patch turns them into allocatable as a
prerequisite to enable copy between VGPR and
AGPR registers during regalloc.

Also, added the missing AV register classes from
192b to 1024b.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D109300
This commit is contained in:
Christudasan Devadasan 2021-09-05 23:40:10 -04:00
parent 7051aeef7a
commit 654c89d85a
13 changed files with 165 additions and 93 deletions

View File

@ -74,11 +74,11 @@ unsigned GCNRegPressure::getRegKind(Register Reg,
assert(Reg.isVirtual());
const auto RC = MRI.getRegClass(Reg);
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
return STI->isSGPRClass(RC) ?
(STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
STI->hasAGPRs(RC) ?
(STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) :
(STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
return STI->isSGPRClass(RC)
? (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE)
: STI->isAGPRClass(RC)
? (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE)
: (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
}
void GCNRegPressure::inc(unsigned Reg,

View File

@ -259,7 +259,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
// VGPRz = REG_SEQUENCE VGPRx, sub0
MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
bool IsAGPR = TRI->hasAGPRs(DstRC);
bool IsAGPR = TRI->isAGPRClass(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
Register SrcReg = MI.getOperand(I).getReg();
@ -853,7 +853,7 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
Register PHIRes = MI.getOperand(0).getReg();
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
if (AllAGPRUses && numVGPRUses && !TRI->isAGPRClass(RC0)) {
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {

View File

@ -1586,17 +1586,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
unsigned OpIdx = Op - &UseMI->getOperand(0);
const MCInstrDesc &InstDesc = UseMI->getDesc();
const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
switch (OpInfo.RegClass) {
case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH;
case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH;
case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH;
case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH;
case AMDGPU::AV_160RegClassID:
break;
default:
if (!TRI->isVectorSuperClass(
TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass)))
return false;
}
const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
auto Dst = MRI->createVirtualRegister(NewDstRC);

View File

@ -11483,15 +11483,15 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (I == -1)
break;
MachineOperand &Op = MI.getOperand(I);
if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
!Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg()))
if (!Op.isReg() || !Op.getReg().isVirtual())
continue;
auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
if (!TRI->hasAGPRs(RC))
continue;
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
if (!Src || !Src->isCopy() ||
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
continue;
auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
// All uses of agpr64 and agpr32 can also accept vgpr except for
// v_accvgpr_read, but we do not produce agpr reads during selection,

View File

@ -898,10 +898,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.hasAGPRs(RC)) {
if (RI.isAGPRClass(RC)) {
Opcode = (RI.hasVGPRs(SrcRC)) ?
AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
} else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) {
} else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
} else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
(RI.isProperlyAlignedRC(*RC) &&
@ -1205,7 +1205,7 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB,
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
if (RI.hasAGPRs(DstRC))
if (RI.isAGPRClass(DstRC))
return AMDGPU::COPY;
if (RI.getRegSizeInBits(*DstRC) == 32) {
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
@ -1463,8 +1463,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
: getVGPRSpillSaveOpcode(SpillSize);
unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize)
: getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
@ -1598,8 +1598,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
: getVGPRSpillRestoreOpcode(SpillSize);
unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
: getVGPRSpillRestoreOpcode(SpillSize);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
@ -2802,12 +2802,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
if (Is16Bit) {
if (isVGPRCopy)
return false; // Do not clobber vgpr_hi16
if (isVGPRCopy)
return false; // Do not clobber vgpr_hi16
if (DstReg.isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
return false;
if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
return false;
UseMI.getOperand(0).setSubReg(0);
if (DstReg.isPhysical()) {
@ -3896,9 +3895,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// verification is broken anyway
if (ST.needsAlignedVGPRs()) {
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
const bool IsVGPR = RI.hasVGPRs(RC);
const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC);
if ((IsVGPR || IsAGPR) && MO.getSubReg()) {
if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
const TargetRegisterClass *SubRC =
RI.getSubRegClass(RC, MO.getSubReg());
RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg());
@ -5522,13 +5519,13 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
VRC = &AMDGPU::VReg_1RegClass;
} else
VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(SRC)
: RI.getEquivalentVGPRClass(SRC);
} else {
VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(VRC)
: RI.getEquivalentVGPRClass(VRC);
VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(VRC)
: RI.getEquivalentVGPRClass(VRC);
}
RC = VRC;
} else {
@ -7065,8 +7062,8 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::STRICT_WWM:
case AMDGPU::STRICT_WQM: {
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
if (RI.hasAGPRs(SrcRC)) {
if (RI.hasAGPRs(NewDstRC))
if (RI.isAGPRClass(SrcRC)) {
if (RI.isAGPRClass(NewDstRC))
return nullptr;
switch (Inst.getOpcode()) {
@ -7082,7 +7079,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
if (!NewDstRC)
return nullptr;
} else {
if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);

View File

@ -1609,7 +1609,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
}
unsigned BitWidth = 32 * (CI.Width + Paired.Width);
return TRI->hasAGPRs(getDataRegClass(*CI.I))
return TRI->isAGPRClass(getDataRegClass(*CI.I))
? TRI->getAGPRClassForBitWidth(BitWidth)
: TRI->getVGPRClassForBitWidth(BitWidth);
}

View File

@ -1170,7 +1170,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
unsigned I = MI.getOperandNo(&Op);
if (Desc.OpInfo[I].RegClass == -1 ||
!TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
!TRI->isVGPRClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
continue;
if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&

View File

@ -1099,7 +1099,7 @@ void SIRegisterInfo::buildSpillLoadStore(
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
// On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
// Always use 4 byte operations for AGPRs because we need to scavenge
@ -2163,6 +2163,65 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
: getAnyAGPRClassForBitWidth(BitWidth);
}
static const TargetRegisterClass *
getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 64)
return &AMDGPU::AV_64RegClass;
if (BitWidth <= 96)
return &AMDGPU::AV_96RegClass;
if (BitWidth <= 128)
return &AMDGPU::AV_128RegClass;
if (BitWidth <= 160)
return &AMDGPU::AV_160RegClass;
if (BitWidth <= 192)
return &AMDGPU::AV_192RegClass;
if (BitWidth <= 224)
return &AMDGPU::AV_224RegClass;
if (BitWidth <= 256)
return &AMDGPU::AV_256RegClass;
if (BitWidth <= 512)
return &AMDGPU::AV_512RegClass;
if (BitWidth <= 1024)
return &AMDGPU::AV_1024RegClass;
return nullptr;
}
static const TargetRegisterClass *
getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 64)
return &AMDGPU::AV_64_Align2RegClass;
if (BitWidth <= 96)
return &AMDGPU::AV_96_Align2RegClass;
if (BitWidth <= 128)
return &AMDGPU::AV_128_Align2RegClass;
if (BitWidth <= 160)
return &AMDGPU::AV_160_Align2RegClass;
if (BitWidth <= 192)
return &AMDGPU::AV_192_Align2RegClass;
if (BitWidth <= 224)
return &AMDGPU::AV_224_Align2RegClass;
if (BitWidth <= 256)
return &AMDGPU::AV_256_Align2RegClass;
if (BitWidth <= 512)
return &AMDGPU::AV_512_Align2RegClass;
if (BitWidth <= 1024)
return &AMDGPU::AV_1024_Align2RegClass;
return nullptr;
}
const TargetRegisterClass *
SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
if (BitWidth <= 16)
return &AMDGPU::VGPR_LO16RegClass;
if (BitWidth <= 32)
return &AMDGPU::AV_32RegClass;
return ST.needsAlignedVGPRs()
? getAlignedVectorSuperClassForBitWidth(BitWidth)
: getAnyVectorSuperClassForBitWidth(BitWidth);
}
const TargetRegisterClass *
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 16)
@ -2305,15 +2364,14 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
// We can assume that each lane corresponds to one 32-bit register.
unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
if (isSGPRClass(RC)) {
if (Size == 32)
RC = &AMDGPU::SGPR_32RegClass;
else
RC = getSGPRClassForBitWidth(Size);
} else if (hasAGPRs(RC)) {
if (isAGPRClass(RC)) {
RC = getAGPRClassForBitWidth(Size);
} else {
} else if (isVGPRClass(RC)) {
RC = getVGPRClassForBitWidth(Size);
} else if (isVectorSuperClass(RC)) {
RC = getVectorSuperClassForBitWidth(Size);
} else {
RC = getSGPRClassForBitWidth(Size);
}
assert(RC && "Invalid sub-register class size");
return RC;
@ -2626,10 +2684,13 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
if (!ST.needsAlignedVGPRs())
return true;
if (hasVGPRs(&RC))
if (isVGPRClass(&RC))
return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
if (hasAGPRs(&RC))
if (isAGPRClass(&RC))
return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
if (isVectorSuperClass(&RC))
return RC.hasSuperClassEq(
getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
return true;
}

View File

@ -155,6 +155,10 @@ public:
LLVM_READONLY
const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
LLVM_READONLY
const TargetRegisterClass *
getVectorSuperClassForBitWidth(unsigned BitWidth) const;
LLVM_READONLY
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
@ -184,6 +188,11 @@ public:
return hasAGPRs(RC) && !hasVGPRs(RC);
}
/// \returns true only if this class contains both VGPR and AGPR registers
bool isVectorSuperClass(const TargetRegisterClass *RC) const {
return hasVGPRs(RC) && hasAGPRs(RC);
}
/// \returns true if this class contains VGPR registers.
static bool hasVGPRs(const TargetRegisterClass *RC) {
return RC->TSFlags & SIRCFlags::HasVGPR;

View File

@ -862,37 +862,36 @@ def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_
let HasVGPR = 1;
}
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
(add AGPR_32, VGPR_32)> {
let isAllocatable = 0;
let HasVGPR = 1;
let HasAGPR = 1;
}
def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
(add AReg_64, VReg_64)> {
let isAllocatable = 0;
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
let HasVGPR = 1;
let HasAGPR = 1;
}
} // End GeneratePressureSet = 0
let HasVGPR = 1, HasAGPR = 1 in {
def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
(add AReg_96, VReg_96)> {
let isAllocatable = 0;
// Define a register tuple class, along with one requiring an even
// aligned base register.
multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
dag vregList, dag aregList> {
let HasVGPR = 1, HasAGPR = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
// Define 2-aligned variant
def _Align2 : VRegClassBase<numRegs, regTypes,
(add (decimate vregList, 2),
(decimate aregList, 2))>;
}
}
def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
(add AReg_128, VReg_128)> {
let isAllocatable = 0;
}
def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
(add AReg_160, VReg_160)> {
let isAllocatable = 0;
}
} // End HasVGPR = 1, HasAGPR = 1
defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>;
defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>;
defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>;
defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>;
defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
//===----------------------------------------------------------------------===//
// Register operands

View File

@ -1626,13 +1626,14 @@ unsigned getRegBitWidth(unsigned RCID) {
return 32;
case AMDGPU::SGPR_64RegClassID:
case AMDGPU::VS_64RegClassID:
case AMDGPU::AV_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
case AMDGPU::AReg_64RegClassID:
case AMDGPU::SReg_64_XEXECRegClassID:
case AMDGPU::VReg_64_Align2RegClassID:
case AMDGPU::AReg_64_Align2RegClassID:
case AMDGPU::AV_64RegClassID:
case AMDGPU::AV_64_Align2RegClassID:
return 64;
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
@ -1641,6 +1642,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_96_Align2RegClassID:
case AMDGPU::AReg_96_Align2RegClassID:
case AMDGPU::AV_96RegClassID:
case AMDGPU::AV_96_Align2RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
@ -1649,6 +1651,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_128_Align2RegClassID:
case AMDGPU::AReg_128_Align2RegClassID:
case AMDGPU::AV_128RegClassID:
case AMDGPU::AV_128_Align2RegClassID:
return 128;
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
@ -1657,6 +1660,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_160_Align2RegClassID:
case AMDGPU::AReg_160_Align2RegClassID:
case AMDGPU::AV_160RegClassID:
case AMDGPU::AV_160_Align2RegClassID:
return 160;
case AMDGPU::SGPR_192RegClassID:
case AMDGPU::SReg_192RegClassID:
@ -1664,6 +1668,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_192RegClassID:
case AMDGPU::VReg_192_Align2RegClassID:
case AMDGPU::AReg_192_Align2RegClassID:
case AMDGPU::AV_192RegClassID:
case AMDGPU::AV_192_Align2RegClassID:
return 192;
case AMDGPU::SGPR_224RegClassID:
case AMDGPU::SReg_224RegClassID:
@ -1671,6 +1677,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_224RegClassID:
case AMDGPU::VReg_224_Align2RegClassID:
case AMDGPU::AReg_224_Align2RegClassID:
case AMDGPU::AV_224RegClassID:
case AMDGPU::AV_224_Align2RegClassID:
return 224;
case AMDGPU::SGPR_256RegClassID:
case AMDGPU::SReg_256RegClassID:
@ -1678,6 +1686,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_256RegClassID:
case AMDGPU::VReg_256_Align2RegClassID:
case AMDGPU::AReg_256_Align2RegClassID:
case AMDGPU::AV_256RegClassID:
case AMDGPU::AV_256_Align2RegClassID:
return 256;
case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:
@ -1685,6 +1695,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_512RegClassID:
case AMDGPU::VReg_512_Align2RegClassID:
case AMDGPU::AReg_512_Align2RegClassID:
case AMDGPU::AV_512RegClassID:
case AMDGPU::AV_512_Align2RegClassID:
return 512;
case AMDGPU::SGPR_1024RegClassID:
case AMDGPU::SReg_1024RegClassID:
@ -1692,6 +1704,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_1024RegClassID:
case AMDGPU::VReg_1024_Align2RegClassID:
case AMDGPU::AReg_1024_Align2RegClassID:
case AMDGPU::AV_1024RegClassID:
case AMDGPU::AV_1024_Align2RegClassID:
return 1024;
default:
llvm_unreachable("Unexpected register class");

View File

@ -144,7 +144,7 @@ define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-NEXT: liveins: $sgpr30_sgpr31
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2883594 /* regdef:VReg_64 */, def %2
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)

View File

@ -8,15 +8,15 @@
define amdgpu_kernel void @s_input_output_i128() {
; GFX908-LABEL: name: s_input_output_i128
; GFX908: bb.0 (%ir-block.0):
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: s_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=s"()
call void asm sideeffect "; use $0", "s"(i128 %val)
@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
define amdgpu_kernel void @v_input_output_i128() {
; GFX908-LABEL: name: v_input_output_i128
; GFX908: bb.0 (%ir-block.0):
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5046282 /* regdef:VReg_128 */, def %4
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5308426 /* regdef:VReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5046281 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: v_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5177354 /* regdef:VReg_128_Align2 */, def %4
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5505034 /* regdef:VReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5177353 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5505033 /* reguse:VReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = tail call i128 asm sideeffect "; def $0", "=v"()
call void asm sideeffect "; use $0", "v"(i128 %val)
@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
define amdgpu_kernel void @a_input_output_i128() {
; GFX908-LABEL: name: a_input_output_i128
; GFX908: bb.0 (%ir-block.0):
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:AReg_128 */, def %4
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:AReg_128 */, def %4
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:AReg_128 */, [[COPY]]
; GFX908-NEXT: S_ENDPGM 0
; GFX90A-LABEL: name: a_input_output_i128
; GFX90A: bb.0 (%ir-block.0):
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:AReg_128_Align2 */, def %4
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:AReg_128_Align2 */, def %4
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:AReg_128_Align2 */, [[COPY]]
; GFX90A-NEXT: S_ENDPGM 0
%val = call i128 asm sideeffect "; def $0", "=a"()
call void asm sideeffect "; use $0", "a"(i128 %val)