forked from OSchip/llvm-project
AMDGPU: Add G_AMDGPU_MAD_64_32 instructions
These generic instructions are trivially selected to V_MAD_[IU]64_[IU]32 instructions when run on the VALU. When at least both factors are scalar, it is usually better to execute some or all of the instruction on the SALU. To this end, we lower the instruction to simpler instructions that are supported on the SALU when applying the register bank mapping. Differential Revision: https://reviews.llvm.org/D124843
This commit is contained in:
parent
8e724ad965
commit
5df2893a9a
|
@ -458,6 +458,19 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
|
||||||
|
MachineInstr &I) const {
|
||||||
|
MachineBasicBlock *BB = I.getParent();
|
||||||
|
MachineFunction *MF = BB->getParent();
|
||||||
|
const bool IsUnsigned = I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
|
||||||
|
|
||||||
|
I.setDesc(TII.get(IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64
|
||||||
|
: AMDGPU::V_MAD_I64_I32_e64));
|
||||||
|
I.addOperand(*MF, MachineOperand::CreateImm(0));
|
||||||
|
I.addImplicitDefUseOperands(*MF);
|
||||||
|
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: We should probably legalize these to only using 32-bit results.
|
// TODO: We should probably legalize these to only using 32-bit results.
|
||||||
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
|
bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
|
||||||
MachineBasicBlock *BB = I.getParent();
|
MachineBasicBlock *BB = I.getParent();
|
||||||
|
@ -3335,6 +3348,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
|
||||||
case TargetOpcode::G_UADDE:
|
case TargetOpcode::G_UADDE:
|
||||||
case TargetOpcode::G_USUBE:
|
case TargetOpcode::G_USUBE:
|
||||||
return selectG_UADDO_USUBO_UADDE_USUBE(I);
|
return selectG_UADDO_USUBO_UADDE_USUBE(I);
|
||||||
|
case AMDGPU::G_AMDGPU_MAD_U64_U32:
|
||||||
|
case AMDGPU::G_AMDGPU_MAD_I64_I32:
|
||||||
|
return selectG_AMDGPU_MAD_64_32(I);
|
||||||
case TargetOpcode::G_INTTOPTR:
|
case TargetOpcode::G_INTTOPTR:
|
||||||
case TargetOpcode::G_BITCAST:
|
case TargetOpcode::G_BITCAST:
|
||||||
case TargetOpcode::G_PTRTOINT:
|
case TargetOpcode::G_PTRTOINT:
|
||||||
|
|
|
@ -97,6 +97,7 @@ private:
|
||||||
bool selectG_AND_OR_XOR(MachineInstr &I) const;
|
bool selectG_AND_OR_XOR(MachineInstr &I) const;
|
||||||
bool selectG_ADD_SUB(MachineInstr &I) const;
|
bool selectG_ADD_SUB(MachineInstr &I) const;
|
||||||
bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
|
bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
|
||||||
|
bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const;
|
||||||
bool selectG_EXTRACT(MachineInstr &I) const;
|
bool selectG_EXTRACT(MachineInstr &I) const;
|
||||||
bool selectG_MERGE_VALUES(MachineInstr &I) const;
|
bool selectG_MERGE_VALUES(MachineInstr &I) const;
|
||||||
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
|
bool selectG_UNMERGE_VALUES(MachineInstr &I) const;
|
||||||
|
|
|
@ -1555,6 +1555,157 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AMDGPURegisterBankInfo::applyMappingMAD_64_32(
|
||||||
|
const OperandsMapper &OpdMapper) const {
|
||||||
|
MachineInstr &MI = OpdMapper.getMI();
|
||||||
|
MachineRegisterInfo &MRI = OpdMapper.getMRI();
|
||||||
|
|
||||||
|
// Insert basic copies.
|
||||||
|
applyDefaultMapping(OpdMapper);
|
||||||
|
|
||||||
|
Register Dst0 = MI.getOperand(0).getReg();
|
||||||
|
Register Dst1 = MI.getOperand(1).getReg();
|
||||||
|
Register Src0 = MI.getOperand(2).getReg();
|
||||||
|
Register Src1 = MI.getOperand(3).getReg();
|
||||||
|
Register Src2 = MI.getOperand(4).getReg();
|
||||||
|
|
||||||
|
if (MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
bool IsUnsigned = MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
|
||||||
|
LLT S1 = LLT::scalar(1);
|
||||||
|
LLT S32 = LLT::scalar(32);
|
||||||
|
|
||||||
|
bool DstOnValu = MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
|
||||||
|
bool Accumulate = true;
|
||||||
|
|
||||||
|
if (!DstOnValu) {
|
||||||
|
if (mi_match(Src2, MRI, m_ZeroInt()))
|
||||||
|
Accumulate = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keep the multiplication on the SALU.
|
||||||
|
MachineIRBuilder B(MI);
|
||||||
|
|
||||||
|
Register DstHi;
|
||||||
|
Register DstLo = B.buildMul(S32, Src0, Src1).getReg(0);
|
||||||
|
bool MulHiInVgpr = false;
|
||||||
|
|
||||||
|
MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
|
||||||
|
|
||||||
|
if (Subtarget.hasSMulHi()) {
|
||||||
|
DstHi = IsUnsigned ? B.buildUMulH(S32, Src0, Src1).getReg(0)
|
||||||
|
: B.buildSMulH(S32, Src0, Src1).getReg(0);
|
||||||
|
MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
|
||||||
|
} else {
|
||||||
|
Register VSrc0 = B.buildCopy(S32, Src0).getReg(0);
|
||||||
|
Register VSrc1 = B.buildCopy(S32, Src1).getReg(0);
|
||||||
|
|
||||||
|
MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
|
||||||
|
MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
|
||||||
|
|
||||||
|
DstHi = IsUnsigned ? B.buildUMulH(S32, VSrc0, VSrc1).getReg(0)
|
||||||
|
: B.buildSMulH(S32, VSrc0, VSrc1).getReg(0);
|
||||||
|
MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
|
||||||
|
|
||||||
|
if (!DstOnValu) {
|
||||||
|
DstHi = buildReadFirstLane(B, MRI, DstHi);
|
||||||
|
} else {
|
||||||
|
MulHiInVgpr = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accumulate and produce the "carry-out" bit.
|
||||||
|
//
|
||||||
|
// The "carry-out" is defined as bit 64 of the result when computed as a
|
||||||
|
// big integer. For unsigned multiply-add, this matches the usual definition
|
||||||
|
// of carry-out. For signed multiply-add, bit 64 is the sign bit of the
|
||||||
|
// result, which is determined as:
|
||||||
|
// sign(Src0 * Src1) + sign(Src2) + carry-out from unsigned 64-bit add
|
||||||
|
LLT CarryType = DstOnValu ? S1 : S32;
|
||||||
|
const RegisterBank &CarryBank =
|
||||||
|
DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
|
||||||
|
const RegisterBank &DstBank =
|
||||||
|
DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
|
||||||
|
Register Carry;
|
||||||
|
Register Zero;
|
||||||
|
|
||||||
|
if (!IsUnsigned) {
|
||||||
|
Zero = B.buildConstant(S32, 0).getReg(0);
|
||||||
|
MRI.setRegBank(Zero,
|
||||||
|
MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
|
||||||
|
|
||||||
|
Carry = B.buildICmp(CmpInst::ICMP_SLT, MulHiInVgpr ? S1 : S32, DstHi, Zero)
|
||||||
|
.getReg(0);
|
||||||
|
MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
|
||||||
|
: AMDGPU::SGPRRegBank);
|
||||||
|
|
||||||
|
if (DstOnValu && !MulHiInVgpr) {
|
||||||
|
Carry = B.buildTrunc(S1, Carry).getReg(0);
|
||||||
|
MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Accumulate) {
|
||||||
|
if (DstOnValu) {
|
||||||
|
DstLo = B.buildCopy(S32, DstLo).getReg(0);
|
||||||
|
DstHi = B.buildCopy(S32, DstHi).getReg(0);
|
||||||
|
MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
|
||||||
|
MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto Unmerge = B.buildUnmerge(S32, Src2);
|
||||||
|
Register Src2Lo = Unmerge.getReg(0);
|
||||||
|
Register Src2Hi = Unmerge.getReg(1);
|
||||||
|
MRI.setRegBank(Src2Lo, DstBank);
|
||||||
|
MRI.setRegBank(Src2Hi, DstBank);
|
||||||
|
|
||||||
|
if (!IsUnsigned) {
|
||||||
|
auto Src2Sign = B.buildICmp(CmpInst::ICMP_SLT, CarryType, Src2Hi, Zero);
|
||||||
|
MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
|
||||||
|
|
||||||
|
Carry = B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
|
||||||
|
MRI.setRegBank(Carry, CarryBank);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto AddLo = B.buildUAddo(S32, CarryType, DstLo, Src2Lo);
|
||||||
|
DstLo = AddLo.getReg(0);
|
||||||
|
Register CarryLo = AddLo.getReg(1);
|
||||||
|
MRI.setRegBank(DstLo, DstBank);
|
||||||
|
MRI.setRegBank(CarryLo, CarryBank);
|
||||||
|
|
||||||
|
auto AddHi = B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo);
|
||||||
|
DstHi = AddHi.getReg(0);
|
||||||
|
MRI.setRegBank(DstHi, DstBank);
|
||||||
|
|
||||||
|
Register CarryHi = AddHi.getReg(1);
|
||||||
|
MRI.setRegBank(CarryHi, CarryBank);
|
||||||
|
|
||||||
|
if (IsUnsigned) {
|
||||||
|
Carry = CarryHi;
|
||||||
|
} else {
|
||||||
|
Carry = B.buildXor(CarryType, Carry, CarryHi).getReg(0);
|
||||||
|
MRI.setRegBank(Carry, CarryBank);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (IsUnsigned) {
|
||||||
|
Carry = B.buildConstant(CarryType, 0).getReg(0);
|
||||||
|
MRI.setRegBank(Carry, CarryBank);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
B.buildMerge(Dst0, {DstLo, DstHi});
|
||||||
|
|
||||||
|
if (DstOnValu) {
|
||||||
|
B.buildCopy(Dst1, Carry);
|
||||||
|
} else {
|
||||||
|
B.buildTrunc(Dst1, Carry);
|
||||||
|
}
|
||||||
|
|
||||||
|
MI.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Return a suitable opcode for extending the operands of Opc when widening.
|
// Return a suitable opcode for extending the operands of Opc when widening.
|
||||||
static unsigned getExtendOp(unsigned Opc) {
|
static unsigned getExtendOp(unsigned Opc) {
|
||||||
switch (Opc) {
|
switch (Opc) {
|
||||||
|
@ -3093,6 +3244,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||||
case AMDGPU::G_UBFX:
|
case AMDGPU::G_UBFX:
|
||||||
applyMappingBFE(OpdMapper, /*Signed*/ false);
|
applyMappingBFE(OpdMapper, /*Signed*/ false);
|
||||||
return;
|
return;
|
||||||
|
case AMDGPU::G_AMDGPU_MAD_U64_U32:
|
||||||
|
case AMDGPU::G_AMDGPU_MAD_I64_I32:
|
||||||
|
applyMappingMAD_64_32(OpdMapper);
|
||||||
|
return;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -3618,6 +3773,48 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||||
return getDefaultMappingSOP(MI);
|
return getDefaultMappingSOP(MI);
|
||||||
return getDefaultMappingVOP(MI);
|
return getDefaultMappingVOP(MI);
|
||||||
}
|
}
|
||||||
|
case AMDGPU::G_AMDGPU_MAD_U64_U32:
|
||||||
|
case AMDGPU::G_AMDGPU_MAD_I64_I32: {
|
||||||
|
// Three possible mappings:
|
||||||
|
//
|
||||||
|
// - Default SOP
|
||||||
|
// - Default VOP
|
||||||
|
// - Scalar multiply: src0 and src1 are SGPRs, the rest is VOP.
|
||||||
|
//
|
||||||
|
// This allows instruction selection to keep the multiplication part of the
|
||||||
|
// instruction on the SALU.
|
||||||
|
bool AllSalu = true;
|
||||||
|
bool MulSalu = true;
|
||||||
|
for (unsigned i = 0; i < 5; ++i) {
|
||||||
|
Register Reg = MI.getOperand(i).getReg();
|
||||||
|
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
|
||||||
|
if (Bank->getID() != AMDGPU::SGPRRegBankID) {
|
||||||
|
AllSalu = false;
|
||||||
|
if (i == 2 || i == 3) {
|
||||||
|
MulSalu = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (AllSalu)
|
||||||
|
return getDefaultMappingSOP(MI);
|
||||||
|
|
||||||
|
// If the multiply-add is full-rate in VALU, use that even if the
|
||||||
|
// multiplication part is scalar. Accumulating separately on the VALU would
|
||||||
|
// take two instructions.
|
||||||
|
if (!MulSalu || Subtarget.hasFullRate64Ops())
|
||||||
|
return getDefaultMappingVOP(MI);
|
||||||
|
|
||||||
|
// Keep the multiplication on the SALU, then accumulate on the VALU.
|
||||||
|
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
|
||||||
|
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
|
||||||
|
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||||
|
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
|
||||||
|
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case AMDGPU::G_IMPLICIT_DEF: {
|
case AMDGPU::G_IMPLICIT_DEF: {
|
||||||
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
|
||||||
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
|
||||||
|
|
|
@ -86,6 +86,8 @@ public:
|
||||||
|
|
||||||
bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const;
|
bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const;
|
||||||
|
|
||||||
|
bool applyMappingMAD_64_32(const OperandsMapper &OpdMapper) const;
|
||||||
|
|
||||||
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
|
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
|
||||||
Register Reg) const;
|
Register Reg) const;
|
||||||
|
|
||||||
|
|
|
@ -3110,6 +3110,19 @@ def G_AMDGPU_CLAMP : AMDGPUGenericInstruction {
|
||||||
let hasSideEffects = 0;
|
let hasSideEffects = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Integer multiply-add: arg0 * arg1 + arg2.
|
||||||
|
//
|
||||||
|
// arg0 and arg1 are 32-bit integers (interpreted as signed or unsigned),
|
||||||
|
// arg2 is a 64-bit integer. Result is a 64-bit integer and a 1-bit carry-out.
|
||||||
|
class G_AMDGPU_MAD_64_32 : AMDGPUGenericInstruction {
|
||||||
|
let OutOperandList = (outs type0:$dst, type1:$carry_out);
|
||||||
|
let InOperandList = (ins type2:$arg0, type2:$arg1, type0:$arg2);
|
||||||
|
let hasSideEffects = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
def G_AMDGPU_MAD_U64_U32 : G_AMDGPU_MAD_64_32;
|
||||||
|
def G_AMDGPU_MAD_I64_I32 : G_AMDGPU_MAD_64_32;
|
||||||
|
|
||||||
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
|
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
|
||||||
// operand Expects a MachineMemOperand in addition to explicit
|
// operand Expects a MachineMemOperand in addition to explicit
|
||||||
// operands.
|
// operands.
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs %s -o - 2>%t | FileCheck -check-prefix=GCN %s
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_vvv
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||||
|
; GCN-LABEL: name: mad_u64_u32_vvv
|
||||||
|
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||||
|
; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
|
||||||
|
; GCN-NEXT: [[V_MAD_U64_U32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_U64_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_U64_U32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||||
|
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAD_U64_U32_e64_]], implicit [[V_MAD_U64_U32_e64_1]]
|
||||||
|
%0:vgpr(s32) = COPY $vgpr0
|
||||||
|
%1:vgpr(s32) = COPY $vgpr1
|
||||||
|
%2:vgpr(s32) = COPY $vgpr2
|
||||||
|
%3:vgpr(s32) = COPY $vgpr3
|
||||||
|
%4:vgpr(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:vgpr(s64), %6:vcc(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %3
|
||||||
|
S_ENDPGM 0, implicit %5, implicit %6
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_i64_i32_vvv
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||||
|
; GCN-LABEL: name: mad_i64_i32_vvv
|
||||||
|
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||||
|
; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3
|
||||||
|
; GCN-NEXT: [[V_MAD_I64_I32_e64_:%[0-9]+]]:vreg_64, [[V_MAD_I64_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_MAD_I64_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||||
|
; GCN-NEXT: S_ENDPGM 0, implicit [[V_MAD_I64_I32_e64_]], implicit [[V_MAD_I64_I32_e64_1]]
|
||||||
|
%0:vgpr(s32) = COPY $vgpr0
|
||||||
|
%1:vgpr(s32) = COPY $vgpr1
|
||||||
|
%2:vgpr(s32) = COPY $vgpr2
|
||||||
|
%3:vgpr(s32) = COPY $vgpr3
|
||||||
|
%4:vgpr(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:vgpr(s64), %6:vcc(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %3
|
||||||
|
S_ENDPGM 0, implicit %5, implicit %6
|
||||||
|
...
|
|
@ -0,0 +1,550 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX8 %s
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX9MI %s
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck --check-prefixes=CHECK,GFX10 %s
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_sss
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; GFX8-LABEL: name: mad_u64_u32_sss
|
||||||
|
; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||||
|
; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY4]], [[COPY5]]
|
||||||
|
; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec
|
||||||
|
; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
|
||||||
|
; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
|
||||||
|
; GFX9MI-LABEL: name: mad_u64_u32_sss
|
||||||
|
; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||||
|
; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
|
||||||
|
; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
|
||||||
|
; GFX10-LABEL: name: mad_u64_u32_sss
|
||||||
|
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||||
|
; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
|
||||||
|
; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[UMULH]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[UADDE1]](s32)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s32) = COPY $sgpr2
|
||||||
|
%3:_(s32) = COPY $sgpr3
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_ssv
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; GFX8-LABEL: name: mad_u64_u32_ssv
|
||||||
|
; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY4]], [[COPY5]]
|
||||||
|
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32)
|
||||||
|
; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]]
|
||||||
|
; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1)
|
||||||
|
; GFX9MI-LABEL: name: mad_u64_u32_ssv
|
||||||
|
; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX9MI-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY5]], [[MV]]
|
||||||
|
; GFX10-LABEL: name: mad_u64_u32_ssv
|
||||||
|
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
|
||||||
|
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UMULH]](s32)
|
||||||
|
; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]]
|
||||||
|
; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[UADDE1]](s1)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s32) = COPY $vgpr0
|
||||||
|
%3:_(s32) = COPY $vgpr1
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_svs
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $vgpr0, $sgpr1, $sgpr2
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_u64_u32_svs
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[COPY5]]
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr0
|
||||||
|
%2:_(s32) = COPY $sgpr1
|
||||||
|
%3:_(s32) = COPY $sgpr2
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_svv
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_u64_u32_svv
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||||
|
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY4]](s32), [[COPY1]], [[MV]]
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr0
|
||||||
|
%2:_(s32) = COPY $vgpr1
|
||||||
|
%3:_(s32) = COPY $vgpr2
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_vss
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_u64_u32_vss
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[COPY5]]
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr0
|
||||||
|
%2:_(s32) = COPY $sgpr1
|
||||||
|
%3:_(s32) = COPY $sgpr2
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_vsv
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $sgpr0, $vgpr1, $vgpr2
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_u64_u32_vsv
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||||
|
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY4]], [[MV]]
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s32) = COPY $vgpr1
|
||||||
|
%3:_(s32) = COPY $vgpr2
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_vvs
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_u64_u32_vvs
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV]](s64)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY4]]
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr1
|
||||||
|
%2:_(s32) = COPY $sgpr1
|
||||||
|
%3:_(s32) = COPY $sgpr2
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_vvv
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_u64_u32_vvv
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
|
||||||
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
|
||||||
|
; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[MV]]
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr1
|
||||||
|
%2:_(s32) = COPY $vgpr2
|
||||||
|
%3:_(s32) = COPY $vgpr3
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_i64_i32_sss
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; GFX8-LABEL: name: mad_i64_i32_sss
|
||||||
|
; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; GFX8-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||||
|
; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY4]], [[COPY5]]
|
||||||
|
; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec
|
||||||
|
; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C]]
|
||||||
|
; GFX8-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
|
||||||
|
; GFX8-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]]
|
||||||
|
; GFX8-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
|
||||||
|
; GFX8-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[V_READFIRSTLANE_B32_]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX8-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
|
||||||
|
; GFX8-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
|
||||||
|
; GFX9MI-LABEL: name: mad_i64_i32_sss
|
||||||
|
; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||||
|
; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
|
||||||
|
; GFX9MI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX9MI-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
|
||||||
|
; GFX9MI-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]]
|
||||||
|
; GFX9MI-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
|
||||||
|
; GFX9MI-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX9MI-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
|
||||||
|
; GFX9MI-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
|
||||||
|
; GFX10-LABEL: name: mad_i64_i32_sss
|
||||||
|
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
|
||||||
|
; GFX10-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
|
||||||
|
; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
|
||||||
|
; GFX10-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
|
||||||
|
; GFX10-NEXT: [[XOR:%[0-9]+]]:sgpr(s32) = G_XOR [[ICMP]], [[ICMP1]]
|
||||||
|
; GFX10-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[MUL]], [[UV]]
|
||||||
|
; GFX10-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[SMULH]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX10-NEXT: [[XOR1:%[0-9]+]]:sgpr(s32) = G_XOR [[XOR]], [[UADDE1]]
|
||||||
|
; GFX10-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[XOR1]](s32)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s32) = COPY $sgpr2
|
||||||
|
%3:_(s32) = COPY $sgpr3
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_i64_i32_ssv
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; GFX8-LABEL: name: mad_i64_i32_ssv
|
||||||
|
; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; GFX8-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY4]], [[COPY5]]
|
||||||
|
; GFX8-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX8-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
|
||||||
|
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32)
|
||||||
|
; GFX8-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX8-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
|
||||||
|
; GFX8-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[ICMP]], [[ICMP1]]
|
||||||
|
; GFX8-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY6]], [[UV]]
|
||||||
|
; GFX8-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY7]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX8-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]]
|
||||||
|
; GFX8-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY8:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1)
|
||||||
|
; GFX9MI-LABEL: name: mad_i64_i32_ssv
|
||||||
|
; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX9MI-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; GFX9MI-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX9MI-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX9MI-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX9MI-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY4]](s32), [[COPY5]], [[MV]]
|
||||||
|
; GFX10-LABEL: name: mad_i64_i32_ssv
|
||||||
|
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
|
||||||
|
; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C]]
|
||||||
|
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:vcc(s1) = G_TRUNC [[ICMP]](s32)
|
||||||
|
; GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[MUL]](s32)
|
||||||
|
; GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[SMULH]](s32)
|
||||||
|
; GFX10-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64)
|
||||||
|
; GFX10-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C]]
|
||||||
|
; GFX10-NEXT: [[XOR:%[0-9]+]]:vcc(s1) = G_XOR [[TRUNC]], [[ICMP1]]
|
||||||
|
; GFX10-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY4]], [[UV]]
|
||||||
|
; GFX10-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY5]], [[UV1]], [[UADDO1]]
|
||||||
|
; GFX10-NEXT: [[XOR1:%[0-9]+]]:vcc(s1) = G_XOR [[XOR]], [[UADDE1]]
|
||||||
|
; GFX10-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
|
||||||
|
; GFX10-NEXT: [[COPY6:%[0-9]+]]:vcc(s1) = COPY [[XOR1]](s1)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s32) = COPY $vgpr0
|
||||||
|
%3:_(s32) = COPY $vgpr1
|
||||||
|
%4:_(s64) = G_MERGE_VALUES %2, %3
|
||||||
|
%5:_(s64), %6:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %4
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_ss0
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; GFX8-LABEL: name: mad_u64_u32_ss0
|
||||||
|
; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX8-NEXT: [[UMULH:%[0-9]+]]:vgpr_32(s32) = G_UMULH [[COPY2]], [[COPY3]]
|
||||||
|
; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[UMULH]](s32), implicit $exec
|
||||||
|
; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||||
|
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32)
|
||||||
|
; GFX9MI-LABEL: name: mad_u64_u32_ss0
|
||||||
|
; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32)
|
||||||
|
; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32)
|
||||||
|
; GFX10-LABEL: name: mad_u64_u32_ss0
|
||||||
|
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[UMULH]](s32)
|
||||||
|
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C1]](s32)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s64) = G_CONSTANT i64 0
|
||||||
|
%3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_u64_u32_vv0
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_u64_u32_vv0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_U64_U32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_U64_U32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_U64_U32 [[COPY]](s32), [[COPY1]], [[COPY2]]
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr1
|
||||||
|
%2:_(s64) = G_CONSTANT i64 0
|
||||||
|
%3:_(s64), %4:_(s1) = G_AMDGPU_MAD_U64_U32 %0, %1, %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_i64_i32_ss0
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; GFX8-LABEL: name: mad_i64_i32_ss0
|
||||||
|
; GFX8: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX8-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX8-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; GFX8-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
|
||||||
|
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
|
||||||
|
; GFX8-NEXT: [[SMULH:%[0-9]+]]:vgpr_32(s32) = G_SMULH [[COPY2]], [[COPY3]]
|
||||||
|
; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(s32) = V_READFIRSTLANE_B32 [[SMULH]](s32), implicit $exec
|
||||||
|
; GFX8-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX8-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[V_READFIRSTLANE_B32_]](s32), [[C1]]
|
||||||
|
; GFX8-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[V_READFIRSTLANE_B32_]](s32)
|
||||||
|
; GFX8-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
|
||||||
|
; GFX9MI-LABEL: name: mad_i64_i32_ss0
|
||||||
|
; GFX9MI: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX9MI-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX9MI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; GFX9MI-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX9MI-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX9MI-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]]
|
||||||
|
; GFX9MI-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32)
|
||||||
|
; GFX9MI-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
|
||||||
|
; GFX10-LABEL: name: mad_i64_i32_ss0
|
||||||
|
; GFX10: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||||
|
; GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
|
||||||
|
; GFX10-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; GFX10-NEXT: [[MUL:%[0-9]+]]:sgpr(s32) = G_MUL [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
|
||||||
|
; GFX10-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
|
||||||
|
; GFX10-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SMULH]](s32), [[C1]]
|
||||||
|
; GFX10-NEXT: [[MV:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[MUL]](s32), [[SMULH]](s32)
|
||||||
|
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32)
|
||||||
|
%0:_(s32) = COPY $sgpr0
|
||||||
|
%1:_(s32) = COPY $sgpr1
|
||||||
|
%2:_(s64) = G_CONSTANT i64 0
|
||||||
|
%3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2
|
||||||
|
...
|
||||||
|
|
||||||
|
---
|
||||||
|
name: mad_i64_i32_vv0
|
||||||
|
legalized: true
|
||||||
|
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $vgpr0, $vgpr1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; CHECK-LABEL: name: mad_i64_i32_vv0
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||||
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
|
||||||
|
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 0
|
||||||
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[C]](s64)
|
||||||
|
; CHECK-NEXT: [[AMDGPU_MAD_I64_I32_:%[0-9]+]]:vgpr(s64), [[AMDGPU_MAD_I64_I32_1:%[0-9]+]]:vcc(s1) = G_AMDGPU_MAD_I64_I32 [[COPY]](s32), [[COPY1]], [[COPY2]]
|
||||||
|
%0:_(s32) = COPY $vgpr0
|
||||||
|
%1:_(s32) = COPY $vgpr1
|
||||||
|
%2:_(s64) = G_CONSTANT i64 0
|
||||||
|
%3:_(s64), %4:_(s1) = G_AMDGPU_MAD_I64_I32 %0, %1, %2
|
||||||
|
...
|
Loading…
Reference in New Issue