[AMDGPU] Make vector superclasses allocatable
The combined vector register classes with both VGPRs and AGPRs are currently unallocatable. This patch turns them into allocatable as a prerequisite to enable copy between VGPR and AGPR registers during regalloc. Also, added the missing AV register classes from 192b to 1024b. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D109300
This commit is contained in:
parent
7051aeef7a
commit
654c89d85a
|
@ -74,11 +74,11 @@ unsigned GCNRegPressure::getRegKind(Register Reg,
|
|||
assert(Reg.isVirtual());
|
||||
const auto RC = MRI.getRegClass(Reg);
|
||||
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
|
||||
return STI->isSGPRClass(RC) ?
|
||||
(STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
|
||||
STI->hasAGPRs(RC) ?
|
||||
(STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) :
|
||||
(STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
|
||||
return STI->isSGPRClass(RC)
|
||||
? (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE)
|
||||
: STI->isAGPRClass(RC)
|
||||
? (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE)
|
||||
: (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
|
||||
}
|
||||
|
||||
void GCNRegPressure::inc(unsigned Reg,
|
||||
|
|
|
@ -259,7 +259,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
|
|||
// VGPRz = REG_SEQUENCE VGPRx, sub0
|
||||
|
||||
MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
|
||||
bool IsAGPR = TRI->hasAGPRs(DstRC);
|
||||
bool IsAGPR = TRI->isAGPRClass(DstRC);
|
||||
|
||||
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
|
||||
Register SrcReg = MI.getOperand(I).getReg();
|
||||
|
@ -853,7 +853,7 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
|
|||
|
||||
Register PHIRes = MI.getOperand(0).getReg();
|
||||
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
|
||||
if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
|
||||
if (AllAGPRUses && numVGPRUses && !TRI->isAGPRClass(RC0)) {
|
||||
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
|
||||
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
|
||||
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
|
||||
|
|
|
@ -1586,17 +1586,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
|
|||
|
||||
unsigned OpIdx = Op - &UseMI->getOperand(0);
|
||||
const MCInstrDesc &InstDesc = UseMI->getDesc();
|
||||
const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
|
||||
switch (OpInfo.RegClass) {
|
||||
case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH;
|
||||
case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH;
|
||||
case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH;
|
||||
case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH;
|
||||
case AMDGPU::AV_160RegClassID:
|
||||
break;
|
||||
default:
|
||||
if (!TRI->isVectorSuperClass(
|
||||
TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass)))
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
|
||||
auto Dst = MRI->createVirtualRegister(NewDstRC);
|
||||
|
|
|
@ -11483,15 +11483,15 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
|
|||
if (I == -1)
|
||||
break;
|
||||
MachineOperand &Op = MI.getOperand(I);
|
||||
if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
|
||||
OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
|
||||
!Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg()))
|
||||
if (!Op.isReg() || !Op.getReg().isVirtual())
|
||||
continue;
|
||||
auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
|
||||
if (!TRI->hasAGPRs(RC))
|
||||
continue;
|
||||
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
|
||||
if (!Src || !Src->isCopy() ||
|
||||
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
|
||||
continue;
|
||||
auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
|
||||
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
|
||||
// All uses of agpr64 and agpr32 can also accept vgpr except for
|
||||
// v_accvgpr_read, but we do not produce agpr reads during selection,
|
||||
|
|
|
@ -898,10 +898,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
|
||||
unsigned EltSize = 4;
|
||||
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
|
||||
if (RI.hasAGPRs(RC)) {
|
||||
if (RI.isAGPRClass(RC)) {
|
||||
Opcode = (RI.hasVGPRs(SrcRC)) ?
|
||||
AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
|
||||
} else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) {
|
||||
} else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
|
||||
Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
|
||||
} else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
|
||||
(RI.isProperlyAlignedRC(*RC) &&
|
||||
|
@ -1205,7 +1205,7 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB,
|
|||
|
||||
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
|
||||
|
||||
if (RI.hasAGPRs(DstRC))
|
||||
if (RI.isAGPRClass(DstRC))
|
||||
return AMDGPU::COPY;
|
||||
if (RI.getRegSizeInBits(*DstRC) == 32) {
|
||||
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
|
||||
|
@ -1463,8 +1463,8 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
return;
|
||||
}
|
||||
|
||||
unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
|
||||
: getVGPRSpillSaveOpcode(SpillSize);
|
||||
unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize)
|
||||
: getVGPRSpillSaveOpcode(SpillSize);
|
||||
MFI->setHasSpilledVGPRs();
|
||||
|
||||
BuildMI(MBB, MI, DL, get(Opcode))
|
||||
|
@ -1598,8 +1598,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
return;
|
||||
}
|
||||
|
||||
unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
|
||||
: getVGPRSpillRestoreOpcode(SpillSize);
|
||||
unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
|
||||
: getVGPRSpillRestoreOpcode(SpillSize);
|
||||
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
|
||||
.addFrameIndex(FrameIndex) // vaddr
|
||||
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
|
||||
|
@ -2802,12 +2802,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
|
|||
}
|
||||
|
||||
if (Is16Bit) {
|
||||
if (isVGPRCopy)
|
||||
return false; // Do not clobber vgpr_hi16
|
||||
if (isVGPRCopy)
|
||||
return false; // Do not clobber vgpr_hi16
|
||||
|
||||
if (DstReg.isVirtual() &&
|
||||
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
|
||||
return false;
|
||||
if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
|
||||
return false;
|
||||
|
||||
UseMI.getOperand(0).setSubReg(0);
|
||||
if (DstReg.isPhysical()) {
|
||||
|
@ -3896,9 +3895,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
|||
// verification is broken anyway
|
||||
if (ST.needsAlignedVGPRs()) {
|
||||
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
|
||||
const bool IsVGPR = RI.hasVGPRs(RC);
|
||||
const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC);
|
||||
if ((IsVGPR || IsAGPR) && MO.getSubReg()) {
|
||||
if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
|
||||
const TargetRegisterClass *SubRC =
|
||||
RI.getSubRegClass(RC, MO.getSubReg());
|
||||
RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg());
|
||||
|
@ -5522,13 +5519,13 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
|
|||
if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
|
||||
VRC = &AMDGPU::VReg_1RegClass;
|
||||
} else
|
||||
VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
|
||||
VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
|
||||
? RI.getEquivalentAGPRClass(SRC)
|
||||
: RI.getEquivalentVGPRClass(SRC);
|
||||
} else {
|
||||
VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
|
||||
? RI.getEquivalentAGPRClass(VRC)
|
||||
: RI.getEquivalentVGPRClass(VRC);
|
||||
VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
|
||||
? RI.getEquivalentAGPRClass(VRC)
|
||||
: RI.getEquivalentVGPRClass(VRC);
|
||||
}
|
||||
RC = VRC;
|
||||
} else {
|
||||
|
@ -7065,8 +7062,8 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
|
|||
case AMDGPU::STRICT_WWM:
|
||||
case AMDGPU::STRICT_WQM: {
|
||||
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
|
||||
if (RI.hasAGPRs(SrcRC)) {
|
||||
if (RI.hasAGPRs(NewDstRC))
|
||||
if (RI.isAGPRClass(SrcRC)) {
|
||||
if (RI.isAGPRClass(NewDstRC))
|
||||
return nullptr;
|
||||
|
||||
switch (Inst.getOpcode()) {
|
||||
|
@ -7082,7 +7079,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
|
|||
if (!NewDstRC)
|
||||
return nullptr;
|
||||
} else {
|
||||
if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
|
||||
if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
|
||||
return nullptr;
|
||||
|
||||
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
|
||||
|
|
|
@ -1609,7 +1609,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
|
|||
}
|
||||
|
||||
unsigned BitWidth = 32 * (CI.Width + Paired.Width);
|
||||
return TRI->hasAGPRs(getDataRegClass(*CI.I))
|
||||
return TRI->isAGPRClass(getDataRegClass(*CI.I))
|
||||
? TRI->getAGPRClassForBitWidth(BitWidth)
|
||||
: TRI->getVGPRClassForBitWidth(BitWidth);
|
||||
}
|
||||
|
|
|
@ -1170,7 +1170,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
|
|||
|
||||
unsigned I = MI.getOperandNo(&Op);
|
||||
if (Desc.OpInfo[I].RegClass == -1 ||
|
||||
!TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
|
||||
!TRI->isVGPRClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
|
||||
continue;
|
||||
|
||||
if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
|
||||
|
|
|
@ -1099,7 +1099,7 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
|
||||
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
|
||||
// On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
|
||||
const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
|
||||
const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
|
||||
const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
|
||||
|
||||
// Always use 4 byte operations for AGPRs because we need to scavenge
|
||||
|
@ -2163,6 +2163,65 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
|
|||
: getAnyAGPRClassForBitWidth(BitWidth);
|
||||
}
|
||||
|
||||
static const TargetRegisterClass *
|
||||
getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
|
||||
if (BitWidth <= 64)
|
||||
return &AMDGPU::AV_64RegClass;
|
||||
if (BitWidth <= 96)
|
||||
return &AMDGPU::AV_96RegClass;
|
||||
if (BitWidth <= 128)
|
||||
return &AMDGPU::AV_128RegClass;
|
||||
if (BitWidth <= 160)
|
||||
return &AMDGPU::AV_160RegClass;
|
||||
if (BitWidth <= 192)
|
||||
return &AMDGPU::AV_192RegClass;
|
||||
if (BitWidth <= 224)
|
||||
return &AMDGPU::AV_224RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::AV_256RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::AV_512RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
return &AMDGPU::AV_1024RegClass;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static const TargetRegisterClass *
|
||||
getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
|
||||
if (BitWidth <= 64)
|
||||
return &AMDGPU::AV_64_Align2RegClass;
|
||||
if (BitWidth <= 96)
|
||||
return &AMDGPU::AV_96_Align2RegClass;
|
||||
if (BitWidth <= 128)
|
||||
return &AMDGPU::AV_128_Align2RegClass;
|
||||
if (BitWidth <= 160)
|
||||
return &AMDGPU::AV_160_Align2RegClass;
|
||||
if (BitWidth <= 192)
|
||||
return &AMDGPU::AV_192_Align2RegClass;
|
||||
if (BitWidth <= 224)
|
||||
return &AMDGPU::AV_224_Align2RegClass;
|
||||
if (BitWidth <= 256)
|
||||
return &AMDGPU::AV_256_Align2RegClass;
|
||||
if (BitWidth <= 512)
|
||||
return &AMDGPU::AV_512_Align2RegClass;
|
||||
if (BitWidth <= 1024)
|
||||
return &AMDGPU::AV_1024_Align2RegClass;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
|
||||
if (BitWidth <= 16)
|
||||
return &AMDGPU::VGPR_LO16RegClass;
|
||||
if (BitWidth <= 32)
|
||||
return &AMDGPU::AV_32RegClass;
|
||||
return ST.needsAlignedVGPRs()
|
||||
? getAlignedVectorSuperClassForBitWidth(BitWidth)
|
||||
: getAnyVectorSuperClassForBitWidth(BitWidth);
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
|
||||
if (BitWidth <= 16)
|
||||
|
@ -2305,15 +2364,14 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
|
|||
|
||||
// We can assume that each lane corresponds to one 32-bit register.
|
||||
unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
|
||||
if (isSGPRClass(RC)) {
|
||||
if (Size == 32)
|
||||
RC = &AMDGPU::SGPR_32RegClass;
|
||||
else
|
||||
RC = getSGPRClassForBitWidth(Size);
|
||||
} else if (hasAGPRs(RC)) {
|
||||
if (isAGPRClass(RC)) {
|
||||
RC = getAGPRClassForBitWidth(Size);
|
||||
} else {
|
||||
} else if (isVGPRClass(RC)) {
|
||||
RC = getVGPRClassForBitWidth(Size);
|
||||
} else if (isVectorSuperClass(RC)) {
|
||||
RC = getVectorSuperClassForBitWidth(Size);
|
||||
} else {
|
||||
RC = getSGPRClassForBitWidth(Size);
|
||||
}
|
||||
assert(RC && "Invalid sub-register class size");
|
||||
return RC;
|
||||
|
@ -2626,10 +2684,13 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
|
|||
if (!ST.needsAlignedVGPRs())
|
||||
return true;
|
||||
|
||||
if (hasVGPRs(&RC))
|
||||
if (isVGPRClass(&RC))
|
||||
return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
|
||||
if (hasAGPRs(&RC))
|
||||
if (isAGPRClass(&RC))
|
||||
return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
|
||||
if (isVectorSuperClass(&RC))
|
||||
return RC.hasSuperClassEq(
|
||||
getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -155,6 +155,10 @@ public:
|
|||
LLVM_READONLY
|
||||
const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
|
||||
|
||||
LLVM_READONLY
|
||||
const TargetRegisterClass *
|
||||
getVectorSuperClassForBitWidth(unsigned BitWidth) const;
|
||||
|
||||
LLVM_READONLY
|
||||
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
|
||||
|
||||
|
@ -184,6 +188,11 @@ public:
|
|||
return hasAGPRs(RC) && !hasVGPRs(RC);
|
||||
}
|
||||
|
||||
/// \returns true only if this class contains both VGPR and AGPR registers
|
||||
bool isVectorSuperClass(const TargetRegisterClass *RC) const {
|
||||
return hasVGPRs(RC) && hasAGPRs(RC);
|
||||
}
|
||||
|
||||
/// \returns true if this class contains VGPR registers.
|
||||
static bool hasVGPRs(const TargetRegisterClass *RC) {
|
||||
return RC->TSFlags & SIRCFlags::HasVGPR;
|
||||
|
|
|
@ -862,37 +862,36 @@ def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_
|
|||
let HasVGPR = 1;
|
||||
}
|
||||
|
||||
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
|
||||
(add AGPR_32, VGPR_32)> {
|
||||
let isAllocatable = 0;
|
||||
let HasVGPR = 1;
|
||||
let HasAGPR = 1;
|
||||
}
|
||||
|
||||
def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
|
||||
(add AReg_64, VReg_64)> {
|
||||
let isAllocatable = 0;
|
||||
def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
|
||||
let HasVGPR = 1;
|
||||
let HasAGPR = 1;
|
||||
}
|
||||
} // End GeneratePressureSet = 0
|
||||
|
||||
let HasVGPR = 1, HasAGPR = 1 in {
|
||||
def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
|
||||
(add AReg_96, VReg_96)> {
|
||||
let isAllocatable = 0;
|
||||
// Define a register tuple class, along with one requiring an even
|
||||
// aligned base register.
|
||||
multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
|
||||
dag vregList, dag aregList> {
|
||||
let HasVGPR = 1, HasAGPR = 1 in {
|
||||
// Define the regular class.
|
||||
def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
|
||||
|
||||
// Define 2-aligned variant
|
||||
def _Align2 : VRegClassBase<numRegs, regTypes,
|
||||
(add (decimate vregList, 2),
|
||||
(decimate aregList, 2))>;
|
||||
}
|
||||
}
|
||||
|
||||
def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
|
||||
(add AReg_128, VReg_128)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
|
||||
(add AReg_160, VReg_160)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
} // End HasVGPR = 1, HasAGPR = 1
|
||||
defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
|
||||
defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
|
||||
defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
|
||||
defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
|
||||
defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>;
|
||||
defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>;
|
||||
defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>;
|
||||
defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>;
|
||||
defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register operands
|
||||
|
|
|
@ -1626,13 +1626,14 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
return 32;
|
||||
case AMDGPU::SGPR_64RegClassID:
|
||||
case AMDGPU::VS_64RegClassID:
|
||||
case AMDGPU::AV_64RegClassID:
|
||||
case AMDGPU::SReg_64RegClassID:
|
||||
case AMDGPU::VReg_64RegClassID:
|
||||
case AMDGPU::AReg_64RegClassID:
|
||||
case AMDGPU::SReg_64_XEXECRegClassID:
|
||||
case AMDGPU::VReg_64_Align2RegClassID:
|
||||
case AMDGPU::AReg_64_Align2RegClassID:
|
||||
case AMDGPU::AV_64RegClassID:
|
||||
case AMDGPU::AV_64_Align2RegClassID:
|
||||
return 64;
|
||||
case AMDGPU::SGPR_96RegClassID:
|
||||
case AMDGPU::SReg_96RegClassID:
|
||||
|
@ -1641,6 +1642,7 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::VReg_96_Align2RegClassID:
|
||||
case AMDGPU::AReg_96_Align2RegClassID:
|
||||
case AMDGPU::AV_96RegClassID:
|
||||
case AMDGPU::AV_96_Align2RegClassID:
|
||||
return 96;
|
||||
case AMDGPU::SGPR_128RegClassID:
|
||||
case AMDGPU::SReg_128RegClassID:
|
||||
|
@ -1649,6 +1651,7 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::VReg_128_Align2RegClassID:
|
||||
case AMDGPU::AReg_128_Align2RegClassID:
|
||||
case AMDGPU::AV_128RegClassID:
|
||||
case AMDGPU::AV_128_Align2RegClassID:
|
||||
return 128;
|
||||
case AMDGPU::SGPR_160RegClassID:
|
||||
case AMDGPU::SReg_160RegClassID:
|
||||
|
@ -1657,6 +1660,7 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::VReg_160_Align2RegClassID:
|
||||
case AMDGPU::AReg_160_Align2RegClassID:
|
||||
case AMDGPU::AV_160RegClassID:
|
||||
case AMDGPU::AV_160_Align2RegClassID:
|
||||
return 160;
|
||||
case AMDGPU::SGPR_192RegClassID:
|
||||
case AMDGPU::SReg_192RegClassID:
|
||||
|
@ -1664,6 +1668,8 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::AReg_192RegClassID:
|
||||
case AMDGPU::VReg_192_Align2RegClassID:
|
||||
case AMDGPU::AReg_192_Align2RegClassID:
|
||||
case AMDGPU::AV_192RegClassID:
|
||||
case AMDGPU::AV_192_Align2RegClassID:
|
||||
return 192;
|
||||
case AMDGPU::SGPR_224RegClassID:
|
||||
case AMDGPU::SReg_224RegClassID:
|
||||
|
@ -1671,6 +1677,8 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::AReg_224RegClassID:
|
||||
case AMDGPU::VReg_224_Align2RegClassID:
|
||||
case AMDGPU::AReg_224_Align2RegClassID:
|
||||
case AMDGPU::AV_224RegClassID:
|
||||
case AMDGPU::AV_224_Align2RegClassID:
|
||||
return 224;
|
||||
case AMDGPU::SGPR_256RegClassID:
|
||||
case AMDGPU::SReg_256RegClassID:
|
||||
|
@ -1678,6 +1686,8 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::AReg_256RegClassID:
|
||||
case AMDGPU::VReg_256_Align2RegClassID:
|
||||
case AMDGPU::AReg_256_Align2RegClassID:
|
||||
case AMDGPU::AV_256RegClassID:
|
||||
case AMDGPU::AV_256_Align2RegClassID:
|
||||
return 256;
|
||||
case AMDGPU::SGPR_512RegClassID:
|
||||
case AMDGPU::SReg_512RegClassID:
|
||||
|
@ -1685,6 +1695,8 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::AReg_512RegClassID:
|
||||
case AMDGPU::VReg_512_Align2RegClassID:
|
||||
case AMDGPU::AReg_512_Align2RegClassID:
|
||||
case AMDGPU::AV_512RegClassID:
|
||||
case AMDGPU::AV_512_Align2RegClassID:
|
||||
return 512;
|
||||
case AMDGPU::SGPR_1024RegClassID:
|
||||
case AMDGPU::SReg_1024RegClassID:
|
||||
|
@ -1692,6 +1704,8 @@ unsigned getRegBitWidth(unsigned RCID) {
|
|||
case AMDGPU::AReg_1024RegClassID:
|
||||
case AMDGPU::VReg_1024_Align2RegClassID:
|
||||
case AMDGPU::AReg_1024_Align2RegClassID:
|
||||
case AMDGPU::AV_1024RegClassID:
|
||||
case AMDGPU::AV_1024_Align2RegClassID:
|
||||
return 1024;
|
||||
default:
|
||||
llvm_unreachable("Unexpected register class");
|
||||
|
|
|
@ -144,7 +144,7 @@ define double @test_multiple_register_outputs_mixed() #0 {
|
|||
; CHECK-NEXT: liveins: $sgpr30_sgpr31
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
|
||||
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2883594 /* regdef:VReg_64 */, def %2
|
||||
; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
|
||||
|
|
|
@ -8,15 +8,15 @@
|
|||
define amdgpu_kernel void @s_input_output_i128() {
|
||||
; GFX908-LABEL: name: s_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: s_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = tail call i128 asm sideeffect "; def $0", "=s"()
|
||||
call void asm sideeffect "; use $0", "s"(i128 %val)
|
||||
|
@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() {
|
|||
define amdgpu_kernel void @v_input_output_i128() {
|
||||
; GFX908-LABEL: name: v_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5046282 /* regdef:VReg_128 */, def %4
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5308426 /* regdef:VReg_128 */, def %4
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5046281 /* reguse:VReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: v_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5177354 /* regdef:VReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5505034 /* regdef:VReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5177353 /* reguse:VReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5505033 /* reguse:VReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = tail call i128 asm sideeffect "; def $0", "=v"()
|
||||
call void asm sideeffect "; use $0", "v"(i128 %val)
|
||||
|
@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() {
|
|||
define amdgpu_kernel void @a_input_output_i128() {
|
||||
; GFX908-LABEL: name: a_input_output_i128
|
||||
; GFX908: bb.0 (%ir-block.0):
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:AReg_128 */, def %4
|
||||
; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:AReg_128 */, def %4
|
||||
; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:AReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:AReg_128 */, [[COPY]]
|
||||
; GFX908-NEXT: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: a_input_output_i128
|
||||
; GFX90A: bb.0 (%ir-block.0):
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:AReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:AReg_128_Align2 */, def %4
|
||||
; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:AReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:AReg_128_Align2 */, [[COPY]]
|
||||
; GFX90A-NEXT: S_ENDPGM 0
|
||||
%val = call i128 asm sideeffect "; def $0", "=a"()
|
||||
call void asm sideeffect "; use $0", "a"(i128 %val)
|
||||
|
|
Loading…
Reference in New Issue