[AMDGPU] Use single cache policy operand

Replace individual operands GLC, SLC, and DLC with a single cache_policy
bitmask operand. This will reduce the number of operands in MIR and I hope
the amount of code. These operands are mostly 0 anyway.

Additional advantage that parser will accept these flags in any order unlike
now.

Differential Revision: https://reviews.llvm.org/D96469
This commit is contained in:
Stanislav Mekhanoshin 2021-02-08 16:36:10 -08:00
parent af2796c76d
commit 3bffb1cd0e
248 changed files with 7491 additions and 7557 deletions

View File

@ -328,20 +328,14 @@ def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
GISDNodeXFormEquiv<IMMPopCount>;
def gi_extract_glc : GICustomOperandRenderer<"renderExtractGLC">,
GISDNodeXFormEquiv<extract_glc>;
def gi_extract_slc : GICustomOperandRenderer<"renderExtractSLC">,
GISDNodeXFormEquiv<extract_slc>;
def gi_extract_dlc : GICustomOperandRenderer<"renderExtractDLC">,
GISDNodeXFormEquiv<extract_dlc>;
def gi_extract_cpol : GICustomOperandRenderer<"renderExtractCPol">,
GISDNodeXFormEquiv<extract_cpol>;
def gi_extract_swz : GICustomOperandRenderer<"renderExtractSWZ">,
GISDNodeXFormEquiv<extract_swz>;
def gi_extract_sccb : GICustomOperandRenderer<"renderExtractSCCB">,
GISDNodeXFormEquiv<extract_sccb>;
def gi_set_glc : GICustomOperandRenderer<"renderSetGLC">,
GISDNodeXFormEquiv<set_glc>;
def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">,
GISDNodeXFormEquiv<frameindex_to_targetframeindex>;

View File

@ -188,16 +188,13 @@ private:
SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE, SDValue &DLC, SDValue &SWZ,
SDValue &SCCB) const;
SDValue &Idxen, SDValue &Addr64, SDValue &CPol, SDValue &TFE,
SDValue &SWZ) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
SDValue &SLC, SDValue &TFE, SDValue &DLC,
SDValue &SWZ, SDValue &SCCB) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
SDValue &SLC) const;
SDValue &SOffset, SDValue &Offset, SDValue &CPol,
SDValue &TFE, SDValue &SWZ) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset) const;
bool SelectMUBUFScratchOffen(SDNode *Parent,
SDValue Addr, SDValue &RSrc, SDValue &VAddr,
SDValue &SOffset, SDValue &ImmOffset) const;
@ -206,11 +203,8 @@ private:
SDValue &Offset) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
SDValue &Offset, SDValue &GLC, SDValue &SLC,
SDValue &TFE, SDValue &DLC, SDValue &SWZ,
SDValue &SCCB) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &SLC) const;
SDValue &Offset, SDValue &CPol, SDValue &TFE,
SDValue &SWZ) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
@ -1393,13 +1387,11 @@ bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
return true;
}
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,
SDValue &TFE, SDValue &DLC,
SDValue &SWZ, SDValue &SCCB) const {
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset,
SDValue &Offen, SDValue &Idxen,
SDValue &Addr64, SDValue &CPol,
SDValue &TFE, SDValue &SWZ) const {
// Subtarget prefers to use flat instruction
// FIXME: This should be a pattern predicate and not reach here
if (Subtarget->useFlatForGlobal())
@ -1407,14 +1399,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDLoc DL(Addr);
if (!GLC.getNode())
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
if (!SLC.getNode())
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
if (!CPol)
CPol = CurDAG->getTargetConstant(0, DL, MVT::i32);
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
SCCB = CurDAG->getTargetConstant(0, DL, MVT::i1);
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
@ -1492,10 +1480,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &GLC,
SDValue &SLC, SDValue &TFE,
SDValue &DLC, SDValue &SWZ,
SDValue &SCCB) const {
SDValue &Offset, SDValue &CPol,
SDValue &TFE, SDValue &SWZ) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
@ -1504,7 +1490,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return false;
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE, DLC, SWZ, SCCB))
CPol, TFE, SWZ))
return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@ -1523,12 +1509,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset,
SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
SDValue GLC, TFE, DLC, SWZ, SCCB;
SDValue &Offset) const {
SDValue CPol, TFE, SWZ;
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, CPol, TFE, SWZ);
}
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
@ -1650,15 +1634,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &SOffset, SDValue &Offset,
SDValue &GLC, SDValue &SLC,
SDValue &TFE, SDValue &DLC,
SDValue &SWZ, SDValue &SCCB) const {
SDValue &CPol, SDValue &TFE,
SDValue &SWZ) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE, DLC, SWZ, SCCB))
CPol, TFE, SWZ))
return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@ -1680,16 +1663,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset
) const {
SDValue GLC, SLC, TFE, DLC, SWZ, SCCB;
SDValue CPol, TFE, SWZ;
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
SDValue &Soffset, SDValue &Offset,
SDValue &SLC) const {
SDValue GLC, TFE, DLC, SWZ, SCCB;
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, CPol, TFE, SWZ);
}
// Find a load or store from corresponding pattern root.
@ -2383,35 +2359,32 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MachineSDNode *CmpSwap = nullptr;
if (Subtarget->hasAddr64()) {
SDValue SRsrc, VAddr, SOffset, Offset, SLC;
SDValue SRsrc, VAddr, SOffset, Offset;
if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
SDValue CmpVal = Mem->getOperand(2);
SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
// XXX - Do we care about glue operands?
SDValue Ops[] = {
CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
};
SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
Mem->getChain()};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
}
}
if (!CmpSwap) {
SDValue SRsrc, SOffset, Offset, SLC;
if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
SDValue SRsrc, SOffset, Offset;
if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
SDValue CmpVal = Mem->getOperand(2);
SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
SDValue Ops[] = {
CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
};
SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
}

View File

@ -1460,28 +1460,6 @@ static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
return TexFailCtrl == 0;
}
static bool parseCachePolicy(uint64_t Value,
bool *GLC, bool *SLC, bool *DLC, bool *SCC) {
if (GLC) {
*GLC = (Value & 0x1) ? 1 : 0;
Value &= ~(uint64_t)0x1;
}
if (SLC) {
*SLC = (Value & 0x2) ? 1 : 0;
Value &= ~(uint64_t)0x2;
}
if (DLC) {
*DLC = (Value & 0x4) ? 1 : 0;
Value &= ~(uint64_t)0x4;
}
if (SCC) {
*SCC = (Value & 0x10) ? 1 : 0;
Value &= ~(uint64_t)0x10;
}
return Value == 0;
}
bool AMDGPUInstructionSelector::selectImageIntrinsic(
MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
MachineBasicBlock *MBB = MI.getParent();
@ -1607,22 +1585,11 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
// TODO: Check this in verifier.
assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
bool GLC = false;
bool SLC = false;
bool DLC = false;
bool SCC = false;
if (BaseOpcode->Atomic) {
GLC = true; // TODO no-return optimization
if (!parseCachePolicy(
MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), nullptr,
&SLC, IsGFX10Plus ? &DLC : nullptr, &SCC))
return false;
} else {
if (!parseCachePolicy(
MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), &GLC,
&SLC, IsGFX10Plus ? &DLC : nullptr, &SCC))
return false;
}
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
if (BaseOpcode->Atomic)
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
if (CPol & ~AMDGPU::CPol::ALL)
return false;
int NumVAddrRegs = 0;
int NumVAddrDwords = 0;
@ -1708,13 +1675,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
if (IsGFX10Plus)
MIB.addImm(DimInfo->Encoding);
MIB.addImm(Unorm);
if (IsGFX10Plus)
MIB.addImm(DLC);
else
MIB.addImm(SCC);
MIB.addImm(GLC);
MIB.addImm(SLC);
MIB.addImm(CPol);
MIB.addImm(IsA16 && // a16 or r128
STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
if (IsGFX10Plus)
@ -2407,8 +2369,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
MIB.addImm(0);
MIB.addImm(Offset);
MIB.addImm(1); // glc
MIB.addImm(0); // slc
MIB.addImm(AMDGPU::CPol::GLC);
MIB.cloneMemRefs(MI);
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), DstReg)
@ -2991,7 +2952,7 @@ bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
I.add(MI.getOperand(2)); // rsrc
I.add(SOffset);
I.addImm(Offset);
renderExtractSLC(I, MI, 7);
I.addImm(MI.getOperand(7).getImm()); // cpol
I.cloneMemRefs(MI);
MI.eraseFromParent();
@ -3029,8 +2990,7 @@ bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
.addReg(Addr.first)
.addReg(Data)
.addImm(Addr.second)
.addImm(0) // SLC
.addImm(0) // SSCB
.addImm(0) // cpol
.cloneMemRefs(MI);
MI.eraseFromParent();
@ -4153,12 +4113,9 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset);
},
addZeroImm, // glc
addZeroImm, // slc
addZeroImm, // cpol
addZeroImm, // tfe
addZeroImm, // dlc
addZeroImm, // swz
addZeroImm // scc
addZeroImm // swz
}};
}
@ -4182,12 +4139,9 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
addZeroImm, // glc
addZeroImm, // slc
addZeroImm, // cpol
addZeroImm, // tfe
addZeroImm, // dlc
addZeroImm, // swz
addZeroImm // scc
}};
}
@ -4219,7 +4173,9 @@ AMDGPUInstructionSelector::selectMUBUFAddr64Atomic(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { // offset
MIB.addImm(Offset);
},
addZeroImm // slc
[=](MachineInstrBuilder &MIB) {
MIB.addImm(AMDGPU::CPol::GLC); // cpol
}
}};
}
@ -4243,7 +4199,7 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
addZeroImm // slc
[=](MachineInstrBuilder &MIB) { MIB.addImm(AMDGPU::CPol::GLC); } // cpol
}};
}
@ -4333,25 +4289,11 @@ void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
MIB.addImm(MI.getOperand(OpIdx).getImm());
}
void AMDGPUInstructionSelector::renderExtractGLC(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
MIB.addImm(MI.getOperand(OpIdx).getImm() & 1);
}
void AMDGPUInstructionSelector::renderExtractSLC(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 1) & 1);
}
void AMDGPUInstructionSelector::renderExtractDLC(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 2) & 1);
MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
}
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
@ -4361,11 +4303,11 @@ void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
}
void AMDGPUInstructionSelector::renderExtractSCCB(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
assert(OpIdx >= 0 && "expected to match an immediate operand");
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 4) & 1);
MIB.addImm(MI.getOperand(OpIdx).getImm() | AMDGPU::CPol::GLC);
}
void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,

View File

@ -286,16 +286,12 @@ private:
void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderExtractGLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderExtractSLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderExtractDLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderExtractCPol(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderExtractSWZ(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderExtractSCCB(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderSetGLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

View File

@ -1702,26 +1702,14 @@ static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
}
static unsigned extractGLC(unsigned CachePolicy) {
return CachePolicy & 1;
}
static unsigned extractSLC(unsigned CachePolicy) {
return (CachePolicy >> 1) & 1;
}
static unsigned extractDLC(unsigned CachePolicy) {
return (CachePolicy >> 2) & 1;
static unsigned extractCPol(unsigned CachePolicy) {
return CachePolicy & AMDGPU::CPol::ALL;
}
static unsigned extractSWZ(unsigned CachePolicy) {
return (CachePolicy >> 3) & 1;
}
static unsigned extractSCCB(unsigned CachePolicy) {
return (CachePolicy >> 4) & 1;
}
MachineInstr *
AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
@ -1787,12 +1775,9 @@ AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
MIB.addUse(RSrc)
.addUse(SOffset)
.addImm(ImmOffset)
.addImm(extractGLC(CachePolicy))
.addImm(extractSLC(CachePolicy))
.addImm(extractCPol(CachePolicy))
.addImm(0) // tfe: FIXME: Remove from inst
.addImm(extractDLC(CachePolicy))
.addImm(extractSWZ(CachePolicy))
.addImm(extractSCCB(CachePolicy))
.cloneMemRefs(MI);
// FIXME: We need a way to report failure from applyMappingImpl.

View File

@ -114,10 +114,7 @@ public:
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
ImmTyDLC,
ImmTySCCB,
ImmTyGLC,
ImmTySLC,
ImmTyCPol,
ImmTySWZ,
ImmTyTFE,
ImmTyD16,
@ -340,13 +337,10 @@ public:
bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isDLC() const { return isImmTy(ImmTyDLC); }
bool isSCCB() const { return isImmTy(ImmTySCCB); }
bool isGLC() const { return isImmTy(ImmTyGLC); }
// "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
// value of the GLC operand.
bool isGLC_1() const { return isImmTy(ImmTyGLC); }
bool isSLC() const { return isImmTy(ImmTySLC); }
bool isCPol() const { return isImmTy(ImmTyCPol); }
// "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and
// forced value of the GLC operand.
bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); }
bool isSWZ() const { return isImmTy(ImmTySWZ); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
@ -998,10 +992,7 @@ public:
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
case ImmTyDLC: OS << "DLC"; break;
case ImmTySCCB: OS << "SCCB"; break;
case ImmTyGLC: OS << "GLC"; break;
case ImmTySLC: OS << "SLC"; break;
case ImmTyCPol: OS << "CPol"; break;
case ImmTySWZ: OS << "SWZ"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
@ -1183,6 +1174,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ForcedDPP = false;
bool ForcedSDWA = false;
KernelScopeInfo KernelScope;
unsigned CPolSeen;
/// @name Auto-generated Match Functions
/// {
@ -1267,7 +1259,7 @@ private:
bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
unsigned RegWidth);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
bool IsAtomic, bool IsLds = false);
void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
bool IsGdsHardcoded);
@ -1462,6 +1454,7 @@ public:
OperandMatchResultTy
parseNamedBit(StringRef Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
OperandMatchResultTy parseCPol(OperandVector &Operands);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
StringRef &Value,
SMLoc &StringLoc);
@ -1621,17 +1614,13 @@ public:
OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
int64_t parseGPRIdxMacro();
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
AMDGPUOperand::Ptr defaultDLC() const;
AMDGPUOperand::Ptr defaultSCCB() const;
AMDGPUOperand::Ptr defaultGLC() const;
AMDGPUOperand::Ptr defaultGLC_1() const;
AMDGPUOperand::Ptr defaultSLC() const;
AMDGPUOperand::Ptr defaultCPol() const;
AMDGPUOperand::Ptr defaultCPol_GLC1() const;
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
AMDGPUOperand::Ptr defaultSMEMOffset() const;
@ -1653,6 +1642,8 @@ public:
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
bool parseDimId(unsigned &Encoding);
OperandMatchResultTy parseDim(OperandVector &Operands);
OperandMatchResultTy parseDPP8(OperandVector &Operands);
@ -4096,15 +4087,44 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
const OperandVector &Operands,
const SMLoc &IDLoc) {
int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::glc1);
if (GLCPos != -1) {
// -1 is set by GLC_1 default operand. In all cases "glc" must be present
// in the asm string, and the default value means it is not present.
if (Inst.getOperand(GLCPos).getImm() == -1) {
int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::cpol);
if (CPolPos == -1)
return true;
unsigned CPol = Inst.getOperand(CPolPos).getImm();
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if ((TSFlags & (SIInstrFlags::SMRD)) &&
(CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
Error(IDLoc, "invalid cache policy for SMRD instruction");
return false;
}
if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
return true;
if (TSFlags & SIInstrFlags::IsAtomicRet) {
if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
Error(IDLoc, "instruction must use glc");
return false;
}
} else {
if (CPol & CPol::GLC) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
Error(S, "instruction must not use glc");
return false;
}
}
if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) {
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
Error(S, "instruction must not use scc");
return false;
}
return true;
@ -5280,6 +5300,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
OperandMode Mode = OperandMode_Default;
if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
Mode = OperandMode_NSA;
CPolSeen = 0;
OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
if (Res != MatchOperand_Success) {
@ -5404,12 +5425,6 @@ AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
Error(S, "a16 modifier is not supported on this GPU");
return MatchOperand_ParseFail;
}
if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) {
Error(S, "dlc modifier is not supported on this GPU");
return MatchOperand_ParseFail;
}
if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB)
return MatchOperand_ParseFail;
if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
ImmTy = AMDGPUOperand::ImmTyR128A16;
@ -5418,6 +5433,62 @@ AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
return MatchOperand_Success;
}
OperandMatchResultTy
AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
unsigned CPolOn = 0;
unsigned CPolOff = 0;
SMLoc S = getLoc();
if (trySkipId("glc"))
CPolOn = AMDGPU::CPol::GLC;
else if (trySkipId("noglc"))
CPolOff = AMDGPU::CPol::GLC;
else if (trySkipId("slc"))
CPolOn = AMDGPU::CPol::SLC;
else if (trySkipId("noslc"))
CPolOff = AMDGPU::CPol::SLC;
else if (trySkipId("dlc"))
CPolOn = AMDGPU::CPol::DLC;
else if (trySkipId("nodlc"))
CPolOff = AMDGPU::CPol::DLC;
else if (trySkipId("scc"))
CPolOn = AMDGPU::CPol::SCC;
else if (trySkipId("noscc"))
CPolOff = AMDGPU::CPol::SCC;
else
return MatchOperand_NoMatch;
if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
Error(S, "dlc modifier is not supported on this GPU");
return MatchOperand_ParseFail;
}
if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
Error(S, "scc modifier is not supported on this GPU");
return MatchOperand_ParseFail;
}
if (CPolSeen & (CPolOn | CPolOff)) {
Error(S, "duplicate cache policy modifier");
return MatchOperand_ParseFail;
}
CPolSeen |= (CPolOn | CPolOff);
for (unsigned I = 1; I != Operands.size(); ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (Op.isCPol()) {
Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
return MatchOperand_Success;
}
}
Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
AMDGPUOperand::ImmTyCPol));
return MatchOperand_Success;
}
static void addOptionalImmOperand(
MCInst& Inst, const OperandVector& Operands,
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
@ -6835,36 +6906,43 @@ AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
// mubuf
//===----------------------------------------------------------------------===//
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
}
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const {
return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(),
AMDGPUOperand::ImmTyCPol);
}
void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
bool IsAtomic,
bool IsAtomicReturn,
bool IsLds) {
const OperandVector &Operands,
bool IsAtomic,
bool IsLds) {
bool IsLdsOpcode = IsLds;
bool HasLdsModifier = false;
OptionalImmIndexMap OptionalIdx;
assert(IsAtomicReturn ? IsAtomic : true);
unsigned FirstOperandIdx = 1;
bool IsAtomicReturn = false;
if (IsAtomic) {
for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
if (!Op.isCPol())
continue;
IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
break;
}
if (!IsAtomicReturn) {
int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
if (NewOpc != -1)
Inst.setOpcode(NewOpc);
}
IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
SIInstrFlags::IsAtomicRet;
}
for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@ -6915,19 +6993,12 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
if (!IsAtomic || IsAtomicReturn) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
IsAtomicReturn ? -1 : 0);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
}
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
@ -6962,12 +7033,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
}
//===----------------------------------------------------------------------===//
@ -7009,16 +7077,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1)
addOptionalImmOperand(Inst, Operands, OptionalIdx,
AMDGPUOperand::ImmTySCCB);
if (IsGFX10Plus)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
@ -7034,6 +7093,61 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
cvtMIMG(Inst, Operands, true);
}
void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
bool IsAtomicReturn = false;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
if (!Op.isCPol())
continue;
IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
break;
}
if (!IsAtomicReturn) {
int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
if (NewOpc != -1)
Inst.setOpcode(NewOpc);
}
IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
SIInstrFlags::IsAtomicRet;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
// Add the register arguments
if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
if (IsAtomicReturn && i == 1)
Op.addRegOperands(Inst, 1);
continue;
}
// Handle the case where soffset is an immediate
if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
Op.addImmOperands(Inst, 1);
continue;
}
// Handle tokens like 'offen' which are sometimes hard-coded into the
// asm string. There are no MCInst operands for these.
if (Op.isToken()) {
continue;
}
assert(Op.isImm());
// Handle optional arguments
OptionalIdx[Op.getImmTy()] = i;
}
if ((int)Inst.getNumOperands() <=
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
}
void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
const OperandVector &Operands) {
for (unsigned I = 1; I < Operands.size(); ++I) {
@ -7127,10 +7241,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
{"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
{"scc", AMDGPUOperand::ImmTySCCB, true, nullptr},
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
{"", AMDGPUOperand::ImmTyCPol, false, nullptr},
{"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
@ -7213,6 +7324,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
Op.ConvertResult);
} else if (Op.Type == AMDGPUOperand::ImmTyDim) {
res = parseDim(Operands);
} else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
res = parseCPol(Operands);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@ -8078,8 +8191,6 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
case MCK_lds:
return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
case MCK_glc:
return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
case MCK_idxen:
return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
case MCK_offen:

View File

@ -6,16 +6,14 @@
//
//===----------------------------------------------------------------------===//
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
def MUBUFAddr64 : ComplexPattern<i64, 10, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
def MUBUFOffset : ComplexPattern<i64, 9, "SelectMUBUFOffset">;
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def BUFAddrKind {
int Offset = 0;
@ -124,20 +122,17 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
let TSFlags = ps.TSFlags;
bits<12> offset;
bits<1> glc;
bits<1> dlc;
bits<5> cpol;
bits<7> format;
bits<8> vaddr;
bits<10> vdata;
bits<7> srsrc;
bits<1> slc;
bits<1> tfe;
bits<8> soffset;
bits<4> dfmt = format{3-0};
bits<3> nfmt = format{6-4};
bits<1> sccb;
// GFX90A+ only: instruction uses AccVGPR for data
// Bit superceedes tfe.
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
@ -150,17 +145,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb),
offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb)
offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz)
);
dag InsData = !if(!empty(vaddrList),
(ins vdata_op:$vdata, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb),
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol,
TFE:$tfe, SWZ:$swz),
(ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb)
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol,
TFE:$tfe, SWZ:$swz)
);
dag ret = !if(!empty(vdataList), InsNoData, InsData);
}
@ -212,7 +207,7 @@ class MTBUF_Load_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs getLdStRegisterOperand<vdataClass>.ret:$vdata),
getMTBUFIns<addrKindCopy>.ret,
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@ -254,7 +249,7 @@ class MTBUF_Store_Pseudo <string opName,
: MTBUF_Pseudo<opName,
(outs),
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
pattern>,
MTBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@ -357,16 +352,13 @@ class MUBUF_Real <MUBUF_Pseudo ps> :
let UseNamedOperandTable = ps.UseNamedOperandTable;
bits<12> offset;
bits<1> glc;
bits<1> dlc;
bits<5> cpol;
bits<8> vaddr;
bits<10> vdata;
bits<7> srsrc;
bits<1> slc;
bits<1> tfe;
bits<8> soffset;
bits<1> sccb;
// GFX90A+ only: instruction uses AccVGPR for data
// Bit superceedes tfe.
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
@ -410,19 +402,19 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset, GLC:$glc, SLC:$slc),
offset:$offset, CPol:$cpol),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset, GLC:$glc, SLC:$slc)
offset:$offset, CPol:$cpol)
);
dag InsData = !if(!empty(vaddrList),
(ins vdata_op:$vdata, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc),
SCSrc_b32:$soffset, offset:$offset, CPol:$cpol),
(ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc)
SCSrc_b32:$soffset, offset:$offset, CPol:$cpol)
);
dag ret = !con(
!if(!empty(vdataList), InsNoData, InsData),
!if(isLds, (ins DLC:$dlc, SWZ:$swz, SCCB_0:$sccb), (ins TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb))
!if(isLds, (ins SWZ:$swz), (ins TFE:$tfe, SWZ:$swz))
);
}
@ -495,8 +487,8 @@ class MUBUF_Load_Pseudo <string opName,
(outs vdata_op:$vdata),
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
!if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))),
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
!if(isLds, " lds", "$tfe") # "$dlc$swz$sccb",
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" #
!if(isLds, " lds", "$tfe") # "$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # !if(isLds, "_lds", "") #
@ -514,15 +506,15 @@ class MUBUF_Load_Pseudo <string opName,
}
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
(load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
(load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
>;
class MUBUF_Addr64_Load_Pat <Instruction inst,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> : Pat <
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
>;
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
@ -575,7 +567,7 @@ class MUBUF_Store_Pseudo <string opName,
: MUBUF_Pseudo<opName,
(outs),
getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret,
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@ -593,12 +585,12 @@ multiclass MUBUF_Pseudo_Stores<string opName,
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt,
[(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
MUBUFAddr64Table<0, NAME>;
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt,
[(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
MUBUFAddr64Table<1, NAME>;
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>;
@ -616,8 +608,8 @@ multiclass MUBUF_Pseudo_Stores<string opName,
class MUBUF_Pseudo_Store_Lds<string opName>
: MUBUF_Pseudo<opName,
(outs),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz, SCCB_0:$sccb),
" $srsrc, $soffset$offset lds$glc$slc$swz$sccb"> {
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz),
" $srsrc, $soffset$offset lds$cpol$swz"> {
let mayLoad = 0;
let mayStore = 1;
let maybeAtomic = 1;
@ -638,15 +630,15 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
dag ret = !if(vdata_in,
!if(!empty(vaddrList),
(ins vdata_op:$vdata_in,
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc),
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol),
(ins vdata_op:$vdata_in, vaddrClass:$vaddr,
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc)
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol)
),
!if(!empty(vaddrList),
(ins vdata_op:$vdata,
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc),
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol),
(ins vdata_op:$vdata, vaddrClass:$vaddr,
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc)
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol)
));
}
@ -689,6 +681,7 @@ class MUBUF_Atomic_Pseudo<string opName,
let has_tfe = 0;
let has_sccb = 0;
let maybeAtomic = 1;
let AsmMatchConverter = "cvtMubufAtomic";
}
class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
@ -700,7 +693,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
(outs),
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret,
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$slc",
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol",
pattern>,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
@ -708,7 +701,6 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
let dlc_value = 0;
let sccb_value = 0;
let IsAtomicNoRet = 1;
let AsmMatchConverter = "cvtMubufAtomic";
}
class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
@ -721,7 +713,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
(outs vdata_op:$vdata),
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret,
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc1$slc",
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol",
pattern>,
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
@ -731,7 +723,6 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
let IsAtomicRet = 1;
let Constraints = "$vdata = $vdata_in";
let DisableEncoding = "$vdata_in";
let AsmMatchConverter = "cvtMubufAtomicReturn";
}
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
@ -766,15 +757,15 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
let FPAtomic = isFP in
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <0, NAME # "_RTN">;
let FPAtomic = isFP in
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set vdataType:$vdata,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
vdataType:$vdata_in))]>,
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <1, NAME # "_RTN">;
let FPAtomic = isFP in
@ -1210,24 +1201,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1236,8 +1224,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
}
@ -1296,26 +1283,21 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_glc $auxiliary),
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
(st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_glc $auxiliary),
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1324,9 +1306,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary),
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary),
0, (extract_swz $auxiliary))
>;
}
@ -1386,7 +1367,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN)
getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_slc $cachepolicy))
(as_i16timm $offset), (set_glc $cachepolicy))
>;
def : GCNPat<
@ -1394,7 +1375,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
timm:$offset, timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_slc $cachepolicy))
(as_i16timm $offset), (set_glc $cachepolicy))
>;
def : GCNPat<
@ -1402,7 +1383,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
i32:$soffset, timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_slc $cachepolicy))
(as_i16timm $offset), (set_glc $cachepolicy))
>;
def : GCNPat<
@ -1412,7 +1393,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_slc $cachepolicy))
(set_glc $cachepolicy))
>;
}
@ -1460,7 +1441,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_slc $cachepolicy))
(as_i16timm $offset), $cachepolicy)
>;
def : GCNPat<
@ -1468,7 +1449,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_slc $cachepolicy))
(as_i16timm $offset), $cachepolicy)
>;
def : GCNPat<
@ -1476,7 +1457,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (extract_slc $cachepolicy))
(as_i16timm $offset), $cachepolicy)
>;
def : GCNPat<
@ -1486,7 +1467,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy))
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), $cachepolicy)
>;
}
@ -1512,7 +1493,7 @@ def : GCNPat<
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(extract_slc $cachepolicy)), VReg_64)), sub0)
(set_glc $cachepolicy)), VReg_64)), sub0)
>;
def : GCNPat<
@ -1523,7 +1504,8 @@ def : GCNPat<
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(set_glc $cachepolicy)), VReg_64)),
sub0)
>;
@ -1535,7 +1517,8 @@ def : GCNPat<
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(set_glc $cachepolicy)), VReg_64)),
sub0)
>;
@ -1548,28 +1531,28 @@ def : GCNPat<
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(set_glc $cachepolicy)), VReg_64)),
sub0)
>;
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
>;
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_ld> {
def : GCNPat <
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0)
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0)
(vt (atomic_ld (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset))),
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
>;
}
@ -1590,8 +1573,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
def : GCNPat <
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
(Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
(Instr_OFFSET $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
>;
}
@ -1614,12 +1597,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
def : GCNPat <
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset))),
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
>;
}
@ -1629,12 +1612,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
ValueType vt, PatFrag ld_frag> {
def : GCNPat <
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in)
>;
def : GCNPat <
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in)
>;
}
@ -1679,14 +1662,13 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is first
def : GCNPat <
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
i16:$offset, i1:$slc), vt:$val),
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0)
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val),
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0)
(atomic_st (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
>;
}
let SubtargetPredicate = isGFX6GFX7 in {
@ -1700,8 +1682,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
def : GCNPat <
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)),
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)),
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
>;
}
@ -1715,13 +1697,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
i32:$soffset, u16imm:$offset)),
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
u16imm:$offset)),
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0)
>;
}
@ -1767,8 +1749,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1776,8 +1757,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1785,8 +1765,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1796,8 +1775,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
(as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
}
@ -1835,8 +1813,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1844,8 +1821,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1853,8 +1829,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
(as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
def : GCNPat<
@ -1864,8 +1839,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
getVregSrcForVT<vt>.ret:$vdata,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format),
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
(extract_swz $auxiliary), (extract_sccb $auxiliary))
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
>;
}
@ -1907,21 +1881,21 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{16} = ps.lds;
let Inst{24-18} = op;
let Inst{31-26} = 0x38;
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
let Inst{54} = !if(ps.has_slc, slc, ?);
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
let Inst{25} = op{7};
}
@ -1973,16 +1947,33 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
def _BOTHEN_RTN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>,
AtomicNoRet<NAME # "_BOTHEN_gfx10", 1>;
def _IDXEN_RTN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>,
AtomicNoRet<NAME # "_IDXEN_gfx10", 1>;
def _OFFEN_RTN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>,
AtomicNoRet<NAME # "_OFFEN_gfx10", 1>;
def _OFFSET_RTN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>,
AtomicNoRet<NAME # "_OFFSET_gfx10", 1>;
}
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
MUBUF_Real_AllAddr_gfx10<op>, MUBUF_Real_Atomics_RTN_gfx10<op>;
MUBUF_Real_Atomics_RTN_gfx10<op> {
def _BOTHEN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
AtomicNoRet<NAME # "_BOTHEN_gfx10", 0>;
def _IDXEN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
AtomicNoRet<NAME # "_IDXEN_gfx10", 0>;
def _OFFEN_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
AtomicNoRet<NAME # "_OFFEN_gfx10", 0>;
def _OFFSET_gfx10 :
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
AtomicNoRet<NAME # "_OFFSET_gfx10", 0>;
}
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
@ -2062,18 +2053,38 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
}
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
MUBUF_Real_AllAddr_gfx6_gfx7<op> {
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
def _ADDR64_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 0>;
def _BOTHEN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 0>;
def _IDXEN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 0>;
def _OFFEN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 0>;
def _OFFSET_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 0>;
def _ADDR64_RTN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>,
AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 1>;
def _BOTHEN_RTN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>,
AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 1>;
def _IDXEN_RTN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>,
AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 1>;
def _OFFEN_RTN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>,
AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 1>;
def _OFFSET_RTN_gfx6_gfx7 :
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>,
AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 1>;
}
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
@ -2162,13 +2173,13 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{18-16} = op;
let Inst{31-26} = 0x3a; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
let Inst{54} = !if(ps.has_slc, slc, ?);
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
@ -2179,7 +2190,7 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
let Inst{25-19} = format;
let Inst{53} = op{3};
}
@ -2251,15 +2262,17 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
MUBUF_Real<ps>,
Enc64,
SIMCInstr<ps.PseudoInstr, Enc> {
SIMCInstr<ps.PseudoInstr, Enc>,
AtomicNoRet<!subst("_RTN","",NAME), !if(ps.IsAtomicNoRet, 0,
!if(ps.IsAtomicRet, 1, ?))> {
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{15} = !if(ps.has_sccb, sccb, ps.sccb_value);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{15} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
let Inst{16} = ps.lds;
let Inst{17} = !if(ps.has_slc, slc, ?);
let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@ -2290,7 +2303,7 @@ multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
}
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
multiclass MUBUF_Real_AllAddr_vi<bits<7> op, bit isAtomic = 0, bit isAtomicRet = 0> {
defm _OFFSET : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
defm _OFFEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
defm _IDXEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
@ -2346,9 +2359,9 @@ class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{16} = ps.lds;
let Inst{17} = !if(ps.has_slc, slc, ?);
let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{24-18} = op;
let Inst{31-26} = 0x38; //encoding
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@ -2366,7 +2379,7 @@ multiclass MUBUF_Real_AllAddr_gfx80<bits<7> op> {
}
multiclass MUBUF_Real_Atomic_vi<bits<7> op> :
MUBUF_Real_AllAddr_vi<op> {
MUBUF_Real_AllAddr_vi<op, 1, 0> {
defm _OFFSET_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
defm _OFFEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
defm _IDXEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
@ -2488,7 +2501,7 @@ class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{18-15} = op;
let Inst{22-19} = dfmt;
let Inst{25-23} = nfmt;
@ -2496,8 +2509,9 @@ class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
let Inst{53} = !if(ps.has_sccb, sccb, ps.sccb_value);
let Inst{54} = !if(ps.has_slc, slc, ?);
let Inst{53} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}
@ -2540,7 +2554,7 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
let Inst{11-0} = !if(ps.has_offset, offset, ?);
let Inst{12} = ps.offen;
let Inst{13} = ps.idxen;
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
let Inst{18-15} = op;
let Inst{22-19} = dfmt;
let Inst{25-23} = nfmt;
@ -2548,7 +2562,7 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
let Inst{54} = !if(ps.has_slc, slc, ?);
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
let Inst{55} = !if(ps.has_tfe, tfe, ?);
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
}

View File

@ -541,9 +541,20 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) &&
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) {
insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1);
(SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::cpol);
if (CPolPos != -1) {
unsigned CPol =
(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
AMDGPU::CPol::GLC : 0;
if (MI.getNumOperands() <= (unsigned)CPolPos) {
insertNamedMCOperand(MI, MCOperand::createImm(CPol),
AMDGPU::OpName::cpol);
} else if (CPol) {
MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
}
}
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
@ -559,20 +570,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::FLAT |
SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
if (!isGFX10()) {
int DLCOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dlc);
if (DLCOpIdx != -1) {
auto DLCIter = MI.begin();
std::advance(DLCIter, DLCOpIdx);
MI.insert(DLCIter, MCOperand::createImm(0));
}
}
}
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
int SWZOpIdx =

View File

@ -94,12 +94,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
bits<7> saddr;
bits<10> vdst;
bits<1> slc;
bits<1> glc;
bits<1> dlc;
// Only valid on gfx90a+
bits<1> sccb;
bits<5> cpol;
// Only valid on gfx9
bits<1> lds = 0; // XXX - What does this actually do?
@ -122,8 +117,8 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
let Inst{13} = lds;
let Inst{15-14} = seg;
let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
let Inst{17} = slc;
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
let Inst{17} = cpol{CPolBit.SLC};
let Inst{24-18} = op;
let Inst{31-26} = 0x37; // Encoding.
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
@ -156,9 +151,9 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
(ins VReg_64:$vaddr)),
(ins flat_offset:$offset)),
// FIXME: Operands with default values do not work with following non-optional operands.
!if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, vdata_op:$vdst_in),
(ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
" $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> {
!if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in),
(ins CPol_0:$cpol))),
" $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = HasSaddr;
@ -178,8 +173,8 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
!if(EnableSaddr,
(ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
(ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb)),
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> {
(ins flat_offset:$offset, CPol_0:$cpol)),
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@ -203,9 +198,9 @@ class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
opName,
(outs regClass:$vdst),
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
(ins flat_offset:$offset, CPol_0:$cpol),
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
" $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
" $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let is_flat_global = 1;
let has_data = 0;
let mayLoad = 1;
@ -241,8 +236,8 @@ class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
opName,
(outs),
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc, SCCB_0:$sccb)),
" $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
(ins flat_offset:$offset, CPol:$cpol)),
" $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let is_flat_global = 1;
let mayLoad = 0;
let mayStore = 1;
@ -280,9 +275,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
!if(EnableVaddr,
(ins VGPR_32:$vaddr, flat_offset:$offset),
(ins flat_offset:$offset))),
!if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, getLdStRegisterOperand<regClass>.ret:$vdst_in),
(ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
" $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
!if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
(ins CPol_0:$cpol))),
" $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let has_data = 0;
let mayLoad = 1;
let has_saddr = 1;
@ -301,11 +296,11 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
opName,
(outs),
!if(EnableSaddr,
(ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
(ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
!if(EnableVaddr,
(ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
(ins vdata_op:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
" "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
(ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
(ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))),
" "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
let mayLoad = 0;
let mayStore = 1;
let has_vdst = 0;
@ -382,8 +377,8 @@ multiclass FLAT_Atomic_Pseudo<
RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
" $vaddr, $vdata$offset$slc$sccb">,
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
" $vaddr, $vdata$offset$cpol">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let PseudoInstr = NAME;
@ -393,8 +388,8 @@ multiclass FLAT_Atomic_Pseudo<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
" $vdst, $vaddr, $vdata$offset$glc1$slc$sccb",
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
" $vdst, $vaddr, $vdata$offset$cpol",
[(set vt:$vdst,
(atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
@ -416,8 +411,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def "" : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
" $vaddr, $vdata, off$offset$slc$sccb">,
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
" $vaddr, $vdata, off$offset$cpol">,
GlobalSaddrTable<0, opName>,
AtomicNoRet <opName, 0> {
let has_saddr = 1;
@ -427,8 +422,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
(outs),
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
" $vaddr, $vdata, $saddr$offset$slc$sccb">,
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
" $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName>,
AtomicNoRet <opName#"_saddr", 0> {
let has_saddr = 1;
@ -451,8 +446,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_op:$vdst),
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
" $vdst, $vaddr, $vdata, off$offset$glc1$slc$sccb",
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
" $vdst, $vaddr, $vdata, off$offset$cpol",
[(set vt:$vdst,
(atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
GlobalSaddrTable<0, opName#"_rtn">,
@ -463,8 +458,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
(outs vdst_op:$vdst),
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
" $vdst, $vaddr, $vdata, $saddr$offset$glc1$slc$sccb">,
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
" $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
GlobalSaddrTable<1, opName#"_rtn">,
AtomicNoRet <opName#"_saddr", 1> {
let has_saddr = 1;
@ -827,17 +822,17 @@ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCN
class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
(inst $vaddr, $offset, 0, 0, 0, 0, $in)
(inst $vaddr, $offset, 0, $in)
>;
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
(inst $vaddr, $offset, 0, 0, 0, 0, $in)
(inst $vaddr, $offset, 0, $in)
>;
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)),
(inst $saddr, $voffset, $offset, 0, 0, 0, 0, $in)
(inst $saddr, $voffset, $offset, 0, $in)
>;
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@ -847,7 +842,7 @@ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))),
(inst $saddr, $voffset, $offset, 0, 0, 0)
(inst $saddr, $voffset, $offset, 0)
>;
class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@ -928,7 +923,7 @@ class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType
class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in),
(inst $vaddr, $offset, 0, 0, 0, 0, $in)
(inst $vaddr, $offset, 0, $in)
>;
class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
@ -943,7 +938,7 @@ class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
(vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
(inst $saddr, $offset, 0, 0, 0, 0, $in)
(inst $saddr, $offset, 0, $in)
>;
class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
@ -1390,7 +1385,7 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
let Inst{25} = !if(has_sccb, sccb, ps.sccbValue);
let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
let AsmString = ps.Mnemonic #
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
}
@ -1567,7 +1562,7 @@ class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
let DecoderNamespace = "GFX10";
let Inst{11-0} = offset{11-0};
let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
let Inst{55} = 0;
}

View File

@ -202,26 +202,19 @@ void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "gds");
}
void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (AMDGPU::isGFX10Plus(STI))
printNamedBit(MI, OpNo, O, "dlc");
}
void AMDGPUInstPrinter::printSCCB(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (AMDGPU::isGFX90A(STI))
printNamedBit(MI, OpNo, O, "scc");
}
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "glc");
}
void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
printNamedBit(MI, OpNo, O, "slc");
auto Imm = MI->getOperand(OpNo).getImm();
if (Imm & CPol::GLC)
O << " glc";
if (Imm & CPol::SLC)
O << " slc";
if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
O << " scc";
if (Imm & ~CPol::ALL)
O << " /* unexpected cache policy bit */";
}
void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,

View File

@ -68,14 +68,8 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printSCCB(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,

View File

@ -263,10 +263,10 @@ class MIMG_NoSampler_Helper <mimgopc op, string asm,
string dns="">
: MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -276,10 +276,10 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
string dns="">
: MIMG_gfx90a <op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da"
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -288,10 +288,10 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -301,10 +301,10 @@ class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -389,10 +389,10 @@ class MIMG_Store_Helper <mimgopc op, string asm,
string dns = "">
: MIMG_gfx6789<op.BASE, (outs), dns> {
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -403,10 +403,10 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
: MIMG_gfx90a<op.BASE, (outs), dns> {
let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da"
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -415,10 +415,10 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_gfx10<op.BASE, (outs), dns> {
let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -429,10 +429,10 @@ class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -507,9 +507,9 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da";
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
}
class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
@ -520,9 +520,9 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da";
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
}
class MIMG_Atomic_si<mimgopc op, string asm, RegisterClass data_rc,
@ -555,9 +555,9 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
@ -571,9 +571,9 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
@ -660,10 +660,10 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$tfe$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -671,10 +671,10 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx90a<op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$lwe$da"
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$lwe$da"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -683,11 +683,11 @@ class MIMG_Sampler_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
#"$dlc$glc$slc$r128$a16$tfe$lwe"
#"$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -697,11 +697,11 @@ class MIMG_Sampler_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
#"$dlc$glc$slc$r128$a16$tfe$lwe"
#"$cpol$r128$a16$tfe$lwe"
#!if(BaseOpcode.HasD16, "$d16", "");
}
@ -890,9 +890,7 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16>
dmask = 0xf,
unorm = 1,
d16 = 0,
glc = 0,
slc = 0,
dlc = 0,
cpol = 0,
tfe = 0,
lwe = 0,
r128 = 1,

View File

@ -276,6 +276,18 @@ enum : unsigned {
} // namespace AMDGPU
namespace AMDGPU {
namespace CPol {
enum CPol {
GLC = 1,
SLC = 2,
DLC = 4,
SCC = 16,
ALL = GLC | SLC | DLC | SCC
};
} // namespace CPol
namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
enum Id { // Message ID, width(4) [3:0].

View File

@ -135,10 +135,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(SpillReg, RegState::Kill)
.addReg(SPReg)
.addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // dlc
.addImm(0) // scc
.addImm(0) // cpol
.addMemOperand(MMO);
return;
}
@ -148,12 +145,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // cpol
.addImm(0) // tfe
.addImm(0) // dlc
.addImm(0) // swz
.addImm(0) // scc
.addMemOperand(MMO);
return;
}
@ -180,10 +174,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(SpillReg, RegState::Kill)
.addReg(OffsetReg, HasOffsetReg ? RegState::Kill : 0)
.addImm(0) // offset
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // dlc
.addImm(0) // scc
.addImm(0) // cpol
.addMemOperand(MMO);
if (!HasOffsetReg) {
@ -205,12 +196,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(0) // offset
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // cpol
.addImm(0) // tfe
.addImm(0) // dlc
.addImm(0) // swz
.addImm(0) // scc
.addMemOperand(MMO);
} else {
// No free register, use stack pointer and restore afterwards.
@ -223,12 +211,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(0) // offset
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // cpol
.addImm(0) // tfe
.addImm(0) // dlc
.addImm(0) // swz
.addImm(0) // scc
.addMemOperand(MMO);
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SUB_U32), SPReg)
@ -259,10 +244,7 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg)
.addReg(SPReg)
.addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // dlc
.addImm(0) // scc
.addImm(0) // cpol
.addMemOperand(MMO);
return;
}
@ -278,10 +260,7 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
SpillReg)
.addReg(OffsetReg, RegState::Kill)
.addImm(0)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // dlc
.addImm(0) // scc
.addImm(0) // cpol
.addMemOperand(MMO);
return;
}
@ -292,12 +271,9 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // cpol
.addImm(0) // tfe
.addImm(0) // dlc
.addImm(0) // swz
.addImm(0) // scc
.addMemOperand(MMO);
return;
}
@ -316,12 +292,9 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
.addReg(ScratchRsrcReg)
.addReg(SPReg)
.addImm(0)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // cpol
.addImm(0) // tfe
.addImm(0) // dlc
.addImm(0) // swz
.addImm(0) // scc
.addMemOperand(MMO);
}
@ -416,8 +389,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
.addReg(FlatScrInit)
.addImm(EncodedOffset) // offset
.addImm(0) // glc
.addImm(0) // dlc
.addImm(0) // cpol
.addMemOperand(MMO);
// Mask the offset in [47:0] of the descriptor
@ -713,8 +685,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
.addReg(Rsrc01)
.addImm(EncodedOffset) // offset
.addImm(0) // glc
.addImm(0) // dlc
.addImm(0) // cpol
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
.addMemOperand(MMO);
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
@ -748,8 +719,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
.addReg(MFI->getImplicitBufferPtrUserSGPR())
.addImm(0) // offset
.addImm(0) // glc
.addImm(0) // dlc
.addImm(0) // cpol
.addMemOperand(MMO)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);

View File

@ -5768,28 +5768,6 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
return DAG.getBuildVector(Type, DL, VecElts);
}
static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
SDValue *GLC, SDValue *SLC, SDValue *DLC) {
auto CachePolicyConst = cast<ConstantSDNode>(CachePolicy.getNode());
uint64_t Value = CachePolicyConst->getZExtValue();
SDLoc DL(CachePolicy);
if (GLC) {
*GLC = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x1;
}
if (SLC) {
*SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x2;
}
if (DLC) {
*DLC = DAG.getTargetConstant((Value & 0x4) ? 1 : 0, DL, MVT::i32);
Value &= ~(uint64_t)0x4;
}
return Value == 0;
}
static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT,
SDValue Src, int ExtraElts) {
EVT SrcVT = Src.getValueType();
@ -6184,19 +6162,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
}
SDValue GLC;
SDValue SLC;
SDValue DLC;
if (BaseOpcode->Atomic) {
GLC = True; // TODO no-return optimization
if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
DAG, nullptr, &SLC, IsGFX10Plus ? &DLC : nullptr))
return Op;
} else {
if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
DAG, &GLC, &SLC, IsGFX10Plus ? &DLC : nullptr))
return Op;
}
unsigned CPol = cast<ConstantSDNode>(
Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
if (BaseOpcode->Atomic)
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
if (CPol & ~AMDGPU::CPol::ALL)
return Op;
SmallVector<SDValue, 26> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
@ -6212,12 +6183,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (IsGFX10Plus)
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
Ops.push_back(Unorm);
if (!IsGFX10Plus)
Ops.push_back(DAG.getTargetConstant(0, SDLoc(), MVT::i1));
if (IsGFX10Plus)
Ops.push_back(DLC);
Ops.push_back(GLC);
Ops.push_back(SLC);
Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
Ops.push_back(IsA16 && // r128, a16 for gfx9
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
if (IsGFX10Plus)
@ -6526,11 +6492,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
case Intrinsic::amdgcn_s_buffer_load: {
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
SDValue GLC;
SDValue DLC = DAG.getTargetConstant(0, DL, MVT::i1);
if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
IsGFX10Plus ? &DLC : nullptr))
unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
if (CPol & ~AMDGPU::CPol::ALL)
return Op;
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
DAG);
@ -11229,10 +11192,12 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode());
if (NoRetAtomicOp != -1) {
if (!Node->hasAnyUseOfValue(0)) {
int Glc1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::glc1);
if (Glc1Idx != -1)
MI.RemoveOperand(Glc1Idx);
int CPolIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::cpol);
if (CPolIdx != -1) {
MachineOperand &CPol = MI.getOperand(CPolIdx);
CPol.setImm(CPol.getImm() & ~AMDGPU::CPol::GLC);
}
MI.RemoveOperand(0);
MI.setDesc(TII->get(NoRetAtomicOp));
return;

View File

@ -261,6 +261,13 @@ class Enc64 {
int Size = 8;
}
def CPolBit {
int GLC = 0;
int SLC = 1;
int DLC = 2;
int SCC = 4;
}
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
class VINTRPe <bits<2> op> : Enc32 {
@ -281,21 +288,20 @@ class MIMGe : Enc64 {
bits<10> vdata;
bits<4> dmask;
bits<1> unorm;
bits<1> glc;
bits<5> cpol;
bits<1> r128;
bits<1> tfe;
bits<1> lwe;
bits<1> slc;
bit d16;
bits<7> srsrc;
bits<7> ssamp;
let Inst{11-8} = dmask;
let Inst{12} = unorm;
let Inst{13} = glc;
let Inst{13} = cpol{CPolBit.GLC};
let Inst{15} = r128;
let Inst{17} = lwe;
let Inst{25} = slc;
let Inst{25} = cpol{CPolBit.SLC};
let Inst{31-26} = 0x3c;
let Inst{47-40} = vdata{7-0};
let Inst{52-48} = srsrc{6-2};
@ -306,10 +312,9 @@ class MIMGe : Enc64 {
class MIMGe_gfx6789 <bits<8> op> : MIMGe {
bits<8> vaddr;
bits<1> da;
bits<1> sccb;
let Inst{0} = op{7};
let Inst{7} = sccb;
let Inst{7} = cpol{CPolBit.SCC};
let Inst{14} = da;
let Inst{16} = tfe;
let Inst{24-18} = op{6-0};
@ -319,10 +324,9 @@ class MIMGe_gfx6789 <bits<8> op> : MIMGe {
class MIMGe_gfx90a <bits<8> op> : MIMGe {
bits<8> vaddr;
bits<1> da;
bits<1> sccb;
let Inst{0} = op{7};
let Inst{7} = sccb;
let Inst{7} = cpol{CPolBit.SCC};
let Inst{14} = da;
let Inst{16} = vdata{9}; // ACC bit
let Inst{24-18} = op{6-0};
@ -333,13 +337,12 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe {
bits<8> vaddr0;
bits<3> dim;
bits<2> nsa;
bits<1> dlc;
bits<1> a16;
let Inst{0} = op{7};
let Inst{2-1} = nsa;
let Inst{5-3} = dim;
let Inst{7} = dlc;
let Inst{7} = cpol{CPolBit.DLC};
let Inst{16} = tfe;
let Inst{24-18} = op{6-0};
let Inst{39-32} = vaddr0;

View File

@ -5588,21 +5588,10 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
.add(*SOffset)
.add(*Offset);
// Atomics do not have this operand.
if (const MachineOperand *GLC =
getNamedOperand(MI, AMDGPU::OpName::glc)) {
MIB.addImm(GLC->getImm());
if (const MachineOperand *CPol =
getNamedOperand(MI, AMDGPU::OpName::cpol)) {
MIB.addImm(CPol->getImm());
}
if (const MachineOperand *DLC =
getNamedOperand(MI, AMDGPU::OpName::dlc)) {
MIB.addImm(DLC->getImm());
}
if (const MachineOperand *SCCB =
getNamedOperand(MI, AMDGPU::OpName::sccb)) {
MIB.addImm(SCCB->getImm());
}
MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
if (const MachineOperand *TFE =
getNamedOperand(MI, AMDGPU::OpName::tfe)) {
@ -5622,7 +5611,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
.addReg(NewSRsrc)
.add(*SOffset)
.add(*Offset)
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol))
.cloneMemRefs(MI);
}

View File

@ -818,24 +818,16 @@ def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
// MUBUF/SMEM Patterns
//===----------------------------------------------------------------------===//
def extract_glc : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
}]>;
def extract_slc : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
}]>;
def extract_dlc : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
def extract_cpol : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8);
}]>;
def extract_swz : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
}]>;
def extract_sccb : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 1, SDLoc(N), MVT::i8);
def set_glc : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
}]>;
//===----------------------------------------------------------------------===//
@ -1090,6 +1082,12 @@ class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
let ParserMatchClass = MatchClass;
}
class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> :
OperandWithDefaultOps<i32, (ops (i32 1))> {
let PrintMethod = "print"#Name;
let ParserMatchClass = MatchClass;
}
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
@ -1113,18 +1111,9 @@ def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
def SCCB : NamedOperandBit<"SCCB", NamedMatchClass<"SCCB">>;
def SCCB_0 : NamedOperandBit_0<"SCCB", NamedMatchClass<"SCCB">>;
def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
def DLC_0 : NamedOperandBit_0<"DLC", NamedMatchClass<"DLC">>;
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
def GLC_0 : NamedOperandBit_0<"GLC", NamedMatchClass<"GLC">>;
def GLC_1 : NamedOperandBit_1<"GLC", NamedMatchClass<"GLC_1">>;
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>;
def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>;
def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>;
def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;

View File

@ -104,10 +104,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
unsigned BaseOff;
unsigned DMask;
InstClassEnum InstClass;
bool GLC = 0;
bool SLC = 0;
bool DLC = 0;
bool SCCB = 0; // vmem only.
unsigned CPol;
bool UseST64;
int AddrIdx[MaxAddressRegs];
const MachineOperand *AddrReg[MaxAddressRegs];
@ -533,14 +530,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
Offset &= 0xffff;
} else if (InstClass != MIMG) {
GLC = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm();
if (InstClass != S_BUFFER_LOAD_IMM) {
SLC = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm();
}
DLC = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm();
if (InstClass != S_BUFFER_LOAD_IMM) {
SCCB = TII.getNamedOperand(*I, AMDGPU::OpName::sccb)->getImm();
}
CPol = TII.getNamedOperand(*I, AMDGPU::OpName::cpol)->getImm();
}
AddressRegs Regs = getRegs(Opc, TII);
@ -690,10 +680,9 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
return false;
// Check other optional immediate operands for equality.
unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
AMDGPU::OpName::da, AMDGPU::OpName::r128,
AMDGPU::OpName::a16, AMDGPU::OpName::dlc};
unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol, AMDGPU::OpName::d16,
AMDGPU::OpName::unorm, AMDGPU::OpName::da,
AMDGPU::OpName::r128, AMDGPU::OpName::a16};
for (auto op : OperandsToMatch) {
int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
@ -798,9 +787,8 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
return (EltOffset0 + CI.Width == EltOffset1 ||
EltOffset1 + Paired.Width == EltOffset0) &&
CI.GLC == Paired.GLC && CI.DLC == Paired.DLC &&
(CI.InstClass == S_BUFFER_LOAD_IMM ||
(CI.SLC == Paired.SLC && CI.SCCB == Paired.SCCB));
CI.CPol == Paired.CPol &&
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.CPol == Paired.CPol);
}
// If the offset in elements doesn't fit in 8-bits, we might be able to use
@ -1301,8 +1289,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg)
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
.addImm(MergedOffset) // offset
.addImm(CI.GLC) // glc
.addImm(CI.DLC) // dlc
.addImm(CI.CPol) // cpol
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
@ -1361,12 +1348,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(MergedOffset) // offset
.addImm(CI.GLC) // glc
.addImm(CI.SLC) // slc
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(CI.DLC) // dlc
.addImm(0) // swz
.addImm(CI.SCCB) // scc
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
@ -1429,12 +1413,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(MergedOffset) // offset
.addImm(JoinedFormat) // format
.addImm(CI.GLC) // glc
.addImm(CI.SLC) // slc
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(CI.DLC) // dlc
.addImm(0) // swz
.addImm(CI.SCCB) // scc
.addMemOperand(
combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
@ -1510,12 +1491,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset, Paired.Offset)) // offset
.addImm(JoinedFormat) // format
.addImm(CI.GLC) // glc
.addImm(CI.SLC) // slc
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(CI.DLC) // dlc
.addImm(0) // swz
.addImm(CI.SCCB) // scc
.addMemOperand(
combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
@ -1665,12 +1643,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
.addImm(std::min(CI.Offset, Paired.Offset)) // offset
.addImm(CI.GLC) // glc
.addImm(CI.SLC) // slc
.addImm(CI.CPol) // cpol
.addImm(0) // tfe
.addImm(CI.DLC) // dlc
.addImm(0) // swz
.addImm(CI.SCCB) // scc
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
moveInstsAfter(MIB, InstsToMove);

View File

@ -84,22 +84,6 @@ enum class SIAtomicAddrSpace {
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
};
/// Sets named bit \p BitName to "true" if present in instruction \p MI.
/// \returns Returns true if \p MI is modified, false otherwise.
template <uint16_t BitName>
bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
if (BitIdx == -1)
return false;
MachineOperand &Bit = MI->getOperand(BitIdx);
if (Bit.getImm() != 0)
return false;
Bit.setImm(1);
return true;
}
class SIMemOpInfo final {
private:
@ -288,6 +272,11 @@ protected:
SICacheControl(const GCNSubtarget &ST);
/// Sets named bit \p BitName to "true" if present in instruction \p MI.
/// \returns Returns true if \p MI is modified, false otherwise.
bool enableNamedBit(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Bit) const;
public:
/// Create a cache control for the subtarget \p ST.
@ -369,13 +358,13 @@ protected:
/// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit<AMDGPU::OpName::glc>(MI);
return enableNamedBit(MI, AMDGPU::CPol::GLC);
}
/// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit<AMDGPU::OpName::slc>(MI);
return enableNamedBit(MI, AMDGPU::CPol::SLC);
}
public:
@ -436,7 +425,7 @@ protected:
/// Sets SCC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableSCCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit<AMDGPU::OpName::sccb>(MI);
return enableNamedBit(MI, AMDGPU::CPol::SCC);;
}
public:
@ -485,7 +474,7 @@ protected:
/// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
/// is modified, false otherwise.
bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
return enableNamedBit<AMDGPU::OpName::dlc>(MI);
return enableNamedBit(MI, AMDGPU::CPol::DLC);
}
public:
@ -785,6 +774,16 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
InsertCacheInv = !AmdgcnSkipCacheInvalidations;
}
bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Bit) const {
MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
if (!CPol)
return false;
CPol->setImm(CPol->getImm() | Bit);
return true;
}
/* static */
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();

View File

@ -743,12 +743,9 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
.addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0) // cpol
.addImm(0) // tfe
.addImm(0) // dlc
.addImm(0) // swz
.addImm(0) // scc
.cloneMemRefs(*MI);
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
@ -1010,13 +1007,10 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
MIB.addReg(SOffset, SOffsetRegState);
}
MIB.addImm(Offset + RemRegOffset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0); // tfe for MUBUF or dlc for FLAT
.addImm(0); // cpol
if (!IsFlat)
MIB.addImm(0) // dlc
MIB.addImm(0) // tfe
.addImm(0); // swz
MIB.addImm(0); // scc
MIB.addMemOperand(NewMMO);
if (!IsAGPR && NeedSuperRegDef)

View File

@ -71,6 +71,7 @@ class SM_Real <SM_Pseudo ps>
bits<7> sdst;
bits<32> offset;
bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
bits<5> cpol;
}
class SM_Probe_Pseudo <string opName, dag ins, bit isImm>
@ -122,8 +123,8 @@ multiclass SM_Pseudo_Loads<string opName,
RegisterClass dstClass> {
def _IMM : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
(ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
" $sdst, $sbase, $offset$glc$dlc", []> {
(ins baseClass:$sbase, i32imm:$offset, CPol:$cpol),
" $sdst, $sbase, $offset$cpol", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let PseudoInstr = opName # "_IMM";
@ -133,8 +134,8 @@ multiclass SM_Pseudo_Loads<string opName,
def _SGPR : SM_Load_Pseudo <opName,
(outs dstClass:$sdst),
(ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
" $sdst, $sbase, $offset$glc$dlc", []> {
(ins baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
" $sdst, $sbase, $offset$cpol", []> {
let BaseClass = baseClass;
let PseudoInstr = opName # "_SGPR";
let has_glc = 1;
@ -146,8 +147,8 @@ multiclass SM_Pseudo_Stores<string opName,
RegisterClass baseClass,
RegisterClass srcClass> {
def _IMM : SM_Store_Pseudo <opName,
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
" $sdata, $sbase, $offset$glc$dlc", []> {
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
" $sdata, $sbase, $offset$cpol", []> {
let offset_is_imm = 1;
let BaseClass = baseClass;
let SrcClass = srcClass;
@ -155,8 +156,8 @@ multiclass SM_Pseudo_Stores<string opName,
}
def _SGPR : SM_Store_Pseudo <opName,
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
" $sdata, $sbase, $offset$glc$dlc", []> {
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
" $sdata, $sbase, $offset$cpol", []> {
let BaseClass = baseClass;
let SrcClass = srcClass;
let PseudoInstr = opName # "_SGPR";
@ -232,6 +233,8 @@ class SM_Atomic_Pseudo <string opName,
let IsAtomicNoRet = !not(isRet);
let IsAtomicRet = isRet;
let AsmMatchConverter = "cvtSMEMAtomic";
}
class SM_Pseudo_Atomic<string opName,
@ -241,13 +244,14 @@ class SM_Pseudo_Atomic<string opName,
bit isRet,
string opNameWithSuffix = opName # !if(isImm,
!if(isRet, "_IMM_RTN", "_IMM"),
!if(isRet, "_SGPR_RTN", "_SGPR"))> :
!if(isRet, "_SGPR_RTN", "_SGPR")),
Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
SM_Atomic_Pseudo<opName,
!if(isRet, (outs dataClass:$sdst), (outs)),
!if(isImm,
(ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, DLC:$dlc),
(ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
!if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
(ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
(ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, CPolTy:$cpol)),
!if(isRet, " $sdst", " $sdata") # ", $sbase, $offset$cpol",
isRet>,
AtomicNoRet <opNameWithSuffix, isRet> {
let offset_is_imm = isImm;
@ -463,13 +467,13 @@ multiclass SM_Real_Loads_si<bits<5> op, string ps,
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_si : SMRD_Real_si <op, immPs> {
let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
}
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _SGPR_si : SMRD_Real_si <op, sgprPs> {
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@ -497,15 +501,13 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
: SM_Real<ps>
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
, Enc64 {
bit glc;
let AssemblerPredicate = isGFX8GFX9;
let DecoderNamespace = "GFX8";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
let Inst{16} = !if(ps.has_glc, glc, ?);
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
let Inst{17} = imm;
let Inst{25-18} = op;
let Inst{31-26} = 0x30; //encoding
@ -519,10 +521,10 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_vi : SMEM_Real_vi <op, immPs> {
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@ -540,11 +542,11 @@ multiclass SM_Real_Stores_vi<bits<8> op, string ps,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@ -604,8 +606,8 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let glc = ps.glc;
let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
let cpol{CPolBit.GLC} = ps.glc;
let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
}
multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
@ -694,7 +696,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
let AssemblerPredicate = isGFX7Only;
let DecoderNamespace = "GFX7";
let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol);
let LGKM_CNT = ps.LGKM_CNT;
let mayLoad = ps.mayLoad;
@ -772,26 +774,26 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 1. IMM offset
def : GCNPat <
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
>;
// 2. 32-bit IMM offset on CI
def : GCNPat <
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
let OtherPredicates = [isGFX7Only];
}
// 3. SGPR offset
def : GCNPat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
>;
// 4. No offset
def : GCNPat <
(vt (smrd_load (i64 SReg_64:$sbase))),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
>;
}
@ -799,8 +801,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 1. Offset as an immediate
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
(vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
(extract_dlc $cachepolicy)))> {
(vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
let AddedComplexity = 2;
}
@ -808,7 +809,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
def : GCNPat <
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
(!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
(extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
(extract_cpol $cachepolicy))> {
let OtherPredicates = [isGFX7Only];
let AddedComplexity = 1;
}
@ -816,8 +817,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
(extract_dlc $cachepolicy)))
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_cpol $cachepolicy)))
>;
}
@ -883,16 +883,13 @@ def : GCNPat <
class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
bit glc;
bit dlc;
let AssemblerPredicate = isGFX10Plus;
let DecoderNamespace = "GFX10";
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
let Inst{14} = !if(ps.has_dlc, dlc, ?);
let Inst{16} = !if(ps.has_glc, glc, ?);
let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
let Inst{25-18} = op;
let Inst{31-26} = 0x3d;
let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?);
@ -904,10 +901,10 @@ multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@ -924,11 +921,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
// FIXME: The operand name $offset is inconsistent with $soff used
// in the pseudo
def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
}
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
}
}
@ -987,15 +984,14 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
bits<7> sdata;
bit dlc;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let glc = ps.glc;
let cpol{CPolBit.GLC} = ps.glc;
let Inst{14} = !if(ps.has_dlc, dlc, 0);
let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
}
multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {

View File

@ -19,7 +19,7 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -27,7 +27,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -35,7 +35,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -71,7 +71,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -79,7 +79,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -97,7 +97,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -125,7 +125,7 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -133,7 +133,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -141,7 +141,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -177,7 +177,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -185,7 +185,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -203,7 +203,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -241,7 +241,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -259,7 +259,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -277,7 +277,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -305,21 +305,21 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@ -343,21 +343,21 @@ body: |
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = COPY $vgpr4_vgpr5

View File

@ -26,7 +26,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
@ -40,7 +40,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
@ -49,7 +49,7 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -57,7 +57,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -65,7 +65,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -73,7 +73,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -104,7 +104,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
@ -118,7 +118,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
@ -137,7 +137,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -155,7 +155,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -163,7 +163,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -171,7 +171,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -204,7 +204,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
@ -218,7 +218,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
@ -227,7 +227,7 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -235,7 +235,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -243,7 +243,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -251,7 +251,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -282,7 +282,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
@ -296,7 +296,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
@ -315,7 +315,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -333,7 +333,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -341,7 +341,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -349,7 +349,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -392,7 +392,7 @@ body: |
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY7]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
@ -416,7 +416,7 @@ body: |
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY7]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
@ -435,7 +435,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -453,7 +453,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -461,7 +461,7 @@ body: |
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -469,7 +469,7 @@ body: |
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -502,7 +502,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -515,7 +515,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
@ -523,28 +523,28 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@ -573,7 +573,7 @@ body: |
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -586,7 +586,7 @@ body: |
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
@ -594,28 +594,28 @@ body: |
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = COPY $vgpr4_vgpr5
@ -643,7 +643,7 @@ body: |
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
@ -656,7 +656,7 @@ body: |
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
@ -666,7 +666,7 @@ body: |
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7-FLAT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]]
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@ -675,7 +675,7 @@ body: |
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]]
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@ -684,7 +684,7 @@ body: |
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@ -693,7 +693,7 @@ body: |
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -723,7 +723,7 @@ body: |
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX6: $vgpr0 = COPY [[COPY3]]
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
@ -736,7 +736,7 @@ body: |
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
; GFX7: $vgpr0 = COPY [[COPY3]]
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
@ -756,7 +756,7 @@ body: |
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX7-FLAT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@ -775,7 +775,7 @@ body: |
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@ -784,7 +784,7 @@ body: |
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
@ -793,7 +793,7 @@ body: |
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr2

View File

@ -17,19 +17,19 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -51,17 +51,17 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 0)
@ -91,13 +91,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@ -113,7 +113,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -147,12 +147,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -167,7 +167,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
@ -199,13 +199,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@ -221,7 +221,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -255,12 +255,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -275,7 +275,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2048
@ -307,13 +307,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@ -329,7 +329,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -363,12 +363,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -383,7 +383,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4095
@ -415,7 +415,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@ -431,7 +431,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@ -447,7 +447,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -481,7 +481,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -496,7 +496,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -511,7 +511,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4097
@ -533,19 +533,19 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -567,17 +567,17 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 0)
@ -607,13 +607,13 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@ -629,7 +629,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -663,12 +663,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -683,7 +683,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_CONSTANT i64 4095

View File

@ -18,7 +18,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@ -29,19 +29,19 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -63,7 +63,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -73,17 +73,17 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1)
@ -113,7 +113,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@ -124,19 +124,19 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -170,7 +170,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -180,17 +180,17 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047
@ -222,7 +222,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@ -233,13 +233,13 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@ -255,7 +255,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -289,7 +289,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -299,12 +299,12 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -319,7 +319,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2048
@ -351,7 +351,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@ -362,13 +362,13 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@ -384,7 +384,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -418,7 +418,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -428,12 +428,12 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -448,7 +448,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4095
@ -480,7 +480,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
@ -492,7 +492,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@ -508,7 +508,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@ -524,7 +524,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
@ -558,7 +558,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -569,7 +569,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -584,7 +584,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -599,7 +599,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 4097
@ -621,7 +621,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s64
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@ -632,19 +632,19 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -666,7 +666,7 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s64_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -676,17 +676,17 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 1)
@ -716,7 +716,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@ -727,13 +727,13 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@ -749,7 +749,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
@ -783,7 +783,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -793,12 +793,12 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -813,7 +813,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
%2:vgpr(s64) = G_CONSTANT i64 4095

View File

@ -17,12 +17,12 @@ body: |
; WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy
; WAVE32: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:sgpr(p1) = COPY $sgpr2_sgpr3
%1:vgpr(p1) = COPY %0
%2:vgpr(s32) = G_IMPLICIT_DEF
@ -46,7 +46,7 @@ body: |
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@ -55,7 +55,7 @@ body: |
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@ -85,7 +85,7 @@ body: |
; WAVE64: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec
; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
@ -96,7 +96,7 @@ body: |
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
@ -124,14 +124,14 @@ body: |
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3

View File

@ -24,9 +24,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@ -92,9 +92,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec

View File

@ -25,9 +25,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@ -91,9 +91,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec

View File

@ -24,9 +24,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@ -92,9 +92,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec

View File

@ -25,9 +25,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
@ -91,9 +91,9 @@ body: |
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec

View File

@ -18,9 +18,9 @@ body: |
; GCN: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0
%2:vgpr(s32) = COPY $vgpr1
@ -133,16 +133,16 @@ body: |
; GCN: %13:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %15:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = COPY $vgpr1
%2:vgpr(p1) = COPY $vgpr2_vgpr3

View File

@ -18,16 +18,16 @@ body: |
; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GCN: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; VI-LABEL: name: fptoui
; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; VI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; VI: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; VI: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0

View File

@ -14,10 +14,10 @@ body: |
; CHECK-LABEL: name: fract_f64_neg
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1)
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@ -26,7 +26,7 @@ body: |
; CHECK: %12:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; CHECK: %15:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36
@ -63,10 +63,10 @@ body: |
; CHECK-LABEL: name: fract_f64_neg_abs
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1)
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@ -75,7 +75,7 @@ body: |
; CHECK: %13:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec
; CHECK: %16:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: S_ENDPGM 0
%2:sgpr(p4) = COPY $sgpr0_sgpr1
%7:sgpr(s64) = G_CONSTANT i64 36

View File

@ -99,7 +99,7 @@ body: |
; GCN-LABEL: name: implicit_def_p1_vgpr
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
@ -117,7 +117,7 @@ body: |
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
; GCN: $m0 = S_MOV_B32 -1
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(p3) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)
@ -134,7 +134,7 @@ body: |
; GCN-LABEL: name: implicit_def_p4_vgpr
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
%0:vgpr(p4) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_CONSTANT i32 4
G_STORE %1, %0 :: (store 4, addrspace 1)

View File

@ -16,12 +16,12 @@ body: |
; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0)
@ -97,12 +97,12 @@ body: |
; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
@ -242,7 +242,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
@ -257,7 +257,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@ -291,12 +291,12 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p0) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095

View File

@ -23,7 +23,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
@ -33,17 +33,17 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1)
@ -144,7 +144,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst
; GFX7: liveins: $vgpr0_vgpr1
@ -154,17 +154,17 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
@ -349,7 +349,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@ -369,7 +369,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -384,12 +384,12 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@ -418,7 +418,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX7: liveins: $vgpr0_vgpr1
@ -428,7 +428,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -443,12 +443,12 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
@ -487,7 +487,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@ -507,7 +507,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -522,12 +522,12 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048

View File

@ -19,22 +19,22 @@ body: |
; GFX6-LABEL: name: load_constant_s32_from_4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@ -57,22 +57,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s16_from_4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_v2s16_from_4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_v2s16_from_4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_v2s16_from_4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@ -94,22 +94,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s32
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v2s32
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v2s32
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v2s32
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@ -130,22 +130,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s32_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v2s32_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v2s32_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v2s32_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
@ -166,22 +166,22 @@ body: |
; GFX6-LABEL: name: load_constant_v4s16_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v4s16_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v4s16_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v4s16_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
@ -203,22 +203,22 @@ body: |
; GFX6-LABEL: name: load_constant_v4s32_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX7-LABEL: name: load_constant_v4s32_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX8-LABEL: name: load_constant_v4s32_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX10-LABEL: name: load_constant_v4s32_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 4)
@ -240,22 +240,22 @@ body: |
; GFX6-LABEL: name: load_constant_s64
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_s64
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_s64
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_s64
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@ -277,22 +277,22 @@ body: |
; GFX6-LABEL: name: load_constant_s64_align4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_s64_align4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_s64_align4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_s64_align4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
@ -314,22 +314,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s64
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX7-LABEL: name: load_constant_v2s64
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX8-LABEL: name: load_constant_v2s64
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
; GFX10-LABEL: name: load_constant_v2s64
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 4)
@ -425,22 +425,22 @@ body: |
; GFX6-LABEL: name: load_constant_p3_from_4
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_p3_from_4
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_p3_from_4
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_p3_from_4
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@ -462,22 +462,22 @@ body: |
; GFX6-LABEL: name: load_constant_p1_from_8
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_p1_from_8
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_p1_from_8
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_p1_from_8
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@ -573,22 +573,22 @@ body: |
; GFX6-LABEL: name: load_constant_v2s16
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_v2s16
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_v2s16
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_v2s16
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
@ -610,22 +610,22 @@ body: |
; GFX6-LABEL: name: load_constant_v4s16
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX7-LABEL: name: load_constant_v4s16
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX8-LABEL: name: load_constant_v4s16
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
; GFX10-LABEL: name: load_constant_v4s16
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
@ -684,22 +684,22 @@ body: |
; GFX6-LABEL: name: load_constant_v8s32
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
; GFX7-LABEL: name: load_constant_v8s32
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
; GFX8-LABEL: name: load_constant_v8s32
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
; GFX10-LABEL: name: load_constant_v8s32
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, align 4, addrspace 4)
@ -721,22 +721,22 @@ body: |
; GFX6-LABEL: name: load_constant_v16s32
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX7-LABEL: name: load_constant_v16s32
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX8-LABEL: name: load_constant_v16s32
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX10-LABEL: name: load_constant_v16s32
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
@ -758,22 +758,22 @@ body: |
; GFX6-LABEL: name: load_constant_v8s64
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX7-LABEL: name: load_constant_v8s64
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX8-LABEL: name: load_constant_v8s64
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
; GFX10-LABEL: name: load_constant_v8s64
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(<8 x s64>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
@ -799,22 +799,22 @@ body: |
; GFX6-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1020
@ -839,22 +839,22 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1024
@ -879,24 +879,24 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1048575
@ -921,24 +921,24 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1048576
@ -963,25 +963,25 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX7: liveins: $sgpr0_sgpr1
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX8: liveins: $sgpr0_sgpr1
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 1073741823
@ -1013,7 +1013,7 @@ body: |
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1
; GFX7: liveins: $sgpr0_sgpr1
@ -1026,7 +1026,7 @@ body: |
; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1
; GFX8: liveins: $sgpr0_sgpr1
@ -1039,12 +1039,12 @@ body: |
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -1
@ -1078,7 +1078,7 @@ body: |
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288
; GFX7: liveins: $sgpr0_sgpr1
@ -1093,7 +1093,7 @@ body: |
; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288
; GFX8: liveins: $sgpr0_sgpr1
@ -1108,12 +1108,12 @@ body: |
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0, 0 :: (load 4, addrspace 4)
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load 4, addrspace 4)
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
%0:sgpr(p4) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -524288

View File

@ -19,22 +19,22 @@ body: |
; GFX7-LABEL: name: load_flat_s32_from_4
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_flat_s32_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_flat_s32_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX10-LABEL: name: load_flat_s32_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
@ -56,22 +56,22 @@ body: |
; GFX7-LABEL: name: load_flat_s32_from_2
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX8-LABEL: name: load_flat_s32_from_2
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX9-LABEL: name: load_flat_s32_from_2
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX10-LABEL: name: load_flat_s32_from_2
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 0)
@ -93,22 +93,22 @@ body: |
; GFX7-LABEL: name: load_flat_s32_from_1
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 0)
@ -129,19 +129,19 @@ body: |
; GFX7-LABEL: name: load_flat_v2s32
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_v2s32
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_v2s32
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_v2s32
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@ -163,22 +163,22 @@ body: |
; GFX7-LABEL: name: load_flat_v3s32
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX8-LABEL: name: load_flat_v3s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX9-LABEL: name: load_flat_v3s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX10-LABEL: name: load_flat_v3s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0)
@ -200,22 +200,22 @@ body: |
; GFX7-LABEL: name: load_flat_v4s32
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_flat_v4s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_flat_v4s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_flat_v4s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
@ -237,22 +237,22 @@ body: |
; GFX7-LABEL: name: load_flat_s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@ -274,22 +274,22 @@ body: |
; GFX7-LABEL: name: load_flat_v2s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_flat_v2s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_flat_v2s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_flat_v2s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
@ -422,22 +422,22 @@ body: |
; GFX7-LABEL: name: load_flat_p3_from_4
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_flat_p3_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_flat_p3_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX10-LABEL: name: load_flat_p3_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
@ -459,22 +459,22 @@ body: |
; GFX7-LABEL: name: load_flat_p1_from_8
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_p1_from_8
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_p1_from_8
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_p1_from_8
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@ -566,22 +566,22 @@ body: |
; GFX7-LABEL: name: load_flat_v2s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_flat_v2s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_flat_v2s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX10-LABEL: name: load_flat_v2s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
@ -603,22 +603,22 @@ body: |
; GFX7-LABEL: name: load_flat_v4s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_flat_v4s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_flat_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_flat_v4s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
@ -728,7 +728,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047
; GFX8: liveins: $vgpr0_vgpr1
@ -743,12 +743,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047
; GFX10: liveins: $vgpr0_vgpr1
@ -763,7 +763,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2047
@ -797,7 +797,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048
; GFX8: liveins: $vgpr0_vgpr1
@ -812,12 +812,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048
; GFX10: liveins: $vgpr0_vgpr1
@ -832,7 +832,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2048
@ -866,7 +866,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047
; GFX8: liveins: $vgpr0_vgpr1
@ -881,7 +881,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047
; GFX9: liveins: $vgpr0_vgpr1
@ -896,7 +896,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047
; GFX10: liveins: $vgpr0_vgpr1
@ -911,7 +911,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2047
@ -945,7 +945,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048
; GFX8: liveins: $vgpr0_vgpr1
@ -960,7 +960,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
@ -975,7 +975,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048
; GFX10: liveins: $vgpr0_vgpr1
@ -990,7 +990,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@ -1024,7 +1024,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095
; GFX8: liveins: $vgpr0_vgpr1
@ -1039,12 +1039,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095
; GFX10: liveins: $vgpr0_vgpr1
@ -1059,7 +1059,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
@ -1093,7 +1093,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096
; GFX8: liveins: $vgpr0_vgpr1
@ -1108,7 +1108,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096
; GFX9: liveins: $vgpr0_vgpr1
@ -1123,7 +1123,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096
; GFX10: liveins: $vgpr0_vgpr1
@ -1138,7 +1138,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4096
@ -1172,7 +1172,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095
; GFX8: liveins: $vgpr0_vgpr1
@ -1187,7 +1187,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095
; GFX9: liveins: $vgpr0_vgpr1
@ -1202,7 +1202,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095
; GFX10: liveins: $vgpr0_vgpr1
@ -1217,7 +1217,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4095
@ -1251,7 +1251,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096
; GFX8: liveins: $vgpr0_vgpr1
@ -1266,7 +1266,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096
; GFX9: liveins: $vgpr0_vgpr1
@ -1281,7 +1281,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096
; GFX10: liveins: $vgpr0_vgpr1
@ -1296,7 +1296,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4096
@ -1330,7 +1330,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191
; GFX8: liveins: $vgpr0_vgpr1
@ -1345,7 +1345,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191
; GFX9: liveins: $vgpr0_vgpr1
@ -1360,7 +1360,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191
; GFX10: liveins: $vgpr0_vgpr1
@ -1375,7 +1375,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8191
@ -1409,7 +1409,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192
; GFX8: liveins: $vgpr0_vgpr1
@ -1424,7 +1424,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192
; GFX9: liveins: $vgpr0_vgpr1
@ -1439,7 +1439,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192
; GFX10: liveins: $vgpr0_vgpr1
@ -1454,7 +1454,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8192
@ -1488,7 +1488,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191
; GFX8: liveins: $vgpr0_vgpr1
@ -1503,7 +1503,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191
; GFX9: liveins: $vgpr0_vgpr1
@ -1518,7 +1518,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191
; GFX10: liveins: $vgpr0_vgpr1
@ -1533,7 +1533,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8191
@ -1567,7 +1567,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192
; GFX8: liveins: $vgpr0_vgpr1
@ -1582,7 +1582,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192
; GFX9: liveins: $vgpr0_vgpr1
@ -1597,7 +1597,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192
; GFX10: liveins: $vgpr0_vgpr1
@ -1612,7 +1612,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8192

View File

@ -16,13 +16,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(p1) = COPY %0
@ -47,13 +47,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@ -81,13 +81,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@ -125,7 +125,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
@ -141,7 +141,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@ -169,7 +169,7 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
@ -195,7 +195,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@ -225,7 +225,7 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
@ -251,7 +251,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:vgpr(s32) = COPY $vgpr0
@ -279,13 +279,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4096
@ -310,13 +310,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4097
@ -351,7 +351,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097
; GFX10: liveins: $sgpr0_sgpr1
@ -367,7 +367,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -4097
@ -392,13 +392,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 2049
@ -423,7 +423,7 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
; GFX10: liveins: $sgpr0_sgpr1
@ -439,7 +439,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -2049
@ -463,13 +463,13 @@ body: |
; GFX9: liveins: $sgpr0_sgpr1
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
; GFX10: liveins: $sgpr0_sgpr1
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4294967295
@ -503,7 +503,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296
; GFX10: liveins: $sgpr0_sgpr1
@ -519,7 +519,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4294967296
@ -554,7 +554,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
; GFX10: liveins: $sgpr0_sgpr1
@ -570,7 +570,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 4294971390
@ -605,7 +605,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295
; GFX10: liveins: $sgpr0_sgpr1
@ -621,7 +621,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -4294967295
@ -655,7 +655,7 @@ body: |
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296
; GFX10: liveins: $sgpr0_sgpr1
@ -671,7 +671,7 @@ body: |
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -4294967296
@ -693,12 +693,12 @@ body: |
; GFX9-LABEL: name: load_global_s32_from_copy_undef_sgpr
; GFX9: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr
; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:sgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(p1) = COPY %0
@ -717,11 +717,11 @@ body: |
bb.0:
; GFX9-LABEL: name: load_global_s32_from_undef_vgpr
; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_undef_vgpr
; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = G_IMPLICIT_DEF
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1)

View File

@ -27,7 +27,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_4
; GFX7: liveins: $vgpr0_vgpr1
@ -37,27 +37,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_4
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_global_s32_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_global_s32_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_s32_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
@ -84,7 +84,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_2
; GFX7: liveins: $vgpr0_vgpr1
@ -94,27 +94,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_2
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX8-LABEL: name: load_global_s32_from_2
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
; GFX9-LABEL: name: load_global_s32_from_2
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
; GFX10-LABEL: name: load_global_s32_from_2
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 1)
@ -141,7 +141,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1
; GFX7: liveins: $vgpr0_vgpr1
@ -151,27 +151,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 1)
@ -198,7 +198,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-LABEL: name: load_global_v2s32
; GFX7: liveins: $vgpr0_vgpr1
@ -208,27 +208,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v2s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_v2s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_v2s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_v2s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@ -255,7 +255,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
; GFX7-LABEL: name: load_global_v4s32
; GFX7: liveins: $vgpr0_vgpr1
@ -265,27 +265,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v4s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_global_v4s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_global_v4s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_global_v4s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
@ -312,27 +312,27 @@ body: |
; GFX7-LABEL: name: load_global_s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX7-FLAT-LABEL: name: load_global_s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@ -359,27 +359,27 @@ body: |
; GFX7-LABEL: name: load_global_v2s64
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX7-FLAT-LABEL: name: load_global_v2s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX8-LABEL: name: load_global_v2s64
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
; GFX9-LABEL: name: load_global_v2s64
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
; GFX10-LABEL: name: load_global_v2s64
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
@ -500,27 +500,27 @@ body: |
; GFX7-LABEL: name: load_global_p3_from_4
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX7-FLAT-LABEL: name: load_global_p3_from_4
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_global_p3_from_4
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_global_p3_from_4
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_p3_from_4
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
@ -547,27 +547,27 @@ body: |
; GFX7-LABEL: name: load_global_p1_from_8
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX7-FLAT-LABEL: name: load_global_p1_from_8
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_p1_from_8
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_p1_from_8
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_p1_from_8
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@ -688,27 +688,27 @@ body: |
; GFX7-LABEL: name: load_global_v2s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX7-FLAT-LABEL: name: load_global_v2s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX8-LABEL: name: load_global_v2s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
; GFX9-LABEL: name: load_global_v2s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
; GFX10-LABEL: name: load_global_v2s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
@ -735,27 +735,27 @@ body: |
; GFX7-LABEL: name: load_global_v4s16
; GFX7: liveins: $vgpr0_vgpr1
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX7-FLAT-LABEL: name: load_global_v4s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX8-LABEL: name: load_global_v4s16
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
; GFX9-LABEL: name: load_global_v4s16
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
; GFX10-LABEL: name: load_global_v4s16
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
@ -833,7 +833,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
; GFX7: liveins: $vgpr0_vgpr1
@ -843,7 +843,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -858,7 +858,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
; GFX8: liveins: $vgpr0_vgpr1
@ -873,17 +873,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_2047
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_2047
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2047
@ -912,7 +912,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
; GFX7: liveins: $vgpr0_vgpr1
@ -922,7 +922,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -937,7 +937,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
; GFX8: liveins: $vgpr0_vgpr1
@ -952,12 +952,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_2048
; GFX10: liveins: $vgpr0_vgpr1
@ -972,7 +972,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 2048
@ -1011,7 +1011,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX7: liveins: $vgpr0_vgpr1
@ -1031,7 +1031,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1046,7 +1046,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX8: liveins: $vgpr0_vgpr1
@ -1061,17 +1061,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2047
@ -1110,7 +1110,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX7: liveins: $vgpr0_vgpr1
@ -1130,7 +1130,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1145,7 +1145,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX8: liveins: $vgpr0_vgpr1
@ -1160,17 +1160,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -2048
@ -1199,7 +1199,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
; GFX7: liveins: $vgpr0_vgpr1
@ -1209,7 +1209,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1224,7 +1224,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
; GFX8: liveins: $vgpr0_vgpr1
@ -1239,12 +1239,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_4095
; GFX10: liveins: $vgpr0_vgpr1
@ -1259,7 +1259,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4095
@ -1289,7 +1289,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
; GFX7: liveins: $vgpr0_vgpr1
@ -1300,7 +1300,7 @@ body: |
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1315,7 +1315,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
; GFX8: liveins: $vgpr0_vgpr1
@ -1330,7 +1330,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_4096
; GFX9: liveins: $vgpr0_vgpr1
@ -1345,7 +1345,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_4096
; GFX10: liveins: $vgpr0_vgpr1
@ -1360,7 +1360,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 4096
@ -1399,7 +1399,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX7: liveins: $vgpr0_vgpr1
@ -1419,7 +1419,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1434,7 +1434,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX8: liveins: $vgpr0_vgpr1
@ -1449,12 +1449,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095
; GFX10: liveins: $vgpr0_vgpr1
@ -1469,7 +1469,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4095
@ -1508,7 +1508,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX7: liveins: $vgpr0_vgpr1
@ -1528,7 +1528,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1543,7 +1543,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX8: liveins: $vgpr0_vgpr1
@ -1558,12 +1558,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096
; GFX10: liveins: $vgpr0_vgpr1
@ -1578,7 +1578,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -4096
@ -1608,7 +1608,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
; GFX7: liveins: $vgpr0_vgpr1
@ -1619,7 +1619,7 @@ body: |
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1634,7 +1634,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
; GFX8: liveins: $vgpr0_vgpr1
@ -1649,7 +1649,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_8191
; GFX9: liveins: $vgpr0_vgpr1
@ -1664,7 +1664,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_8191
; GFX10: liveins: $vgpr0_vgpr1
@ -1679,7 +1679,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8191
@ -1709,7 +1709,7 @@ body: |
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
; GFX7: liveins: $vgpr0_vgpr1
@ -1720,7 +1720,7 @@ body: |
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1735,7 +1735,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
; GFX8: liveins: $vgpr0_vgpr1
@ -1750,7 +1750,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_8192
; GFX9: liveins: $vgpr0_vgpr1
@ -1765,7 +1765,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_8192
; GFX10: liveins: $vgpr0_vgpr1
@ -1780,7 +1780,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 8192
@ -1819,7 +1819,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX7: liveins: $vgpr0_vgpr1
@ -1839,7 +1839,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1854,7 +1854,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX8: liveins: $vgpr0_vgpr1
@ -1869,7 +1869,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX9: liveins: $vgpr0_vgpr1
@ -1884,7 +1884,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191
; GFX10: liveins: $vgpr0_vgpr1
@ -1899,7 +1899,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8191
@ -1938,7 +1938,7 @@ body: |
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX7: liveins: $vgpr0_vgpr1
@ -1958,7 +1958,7 @@ body: |
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX7-FLAT: liveins: $vgpr0_vgpr1
@ -1973,7 +1973,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX8: liveins: $vgpr0_vgpr1
@ -1988,7 +1988,7 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX9: liveins: $vgpr0_vgpr1
@ -2003,7 +2003,7 @@ body: |
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192
; GFX10: liveins: $vgpr0_vgpr1
@ -2018,7 +2018,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = G_CONSTANT i64 -8192

View File

@ -24,27 +24,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]]
; GFX7-FLAT-LABEL: name: load_global_v3s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX8-LABEL: name: load_global_v3s32
; GFX8: liveins: $vgpr0_vgpr1
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
; GFX9-LABEL: name: load_global_v3s32
; GFX9: liveins: $vgpr0_vgpr1
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
; GFX10-LABEL: name: load_global_v3s32
; GFX10: liveins: $vgpr0_vgpr1
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)

View File

@ -19,12 +19,12 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@ -49,12 +49,12 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_2
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_2
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 5)
@ -79,12 +79,12 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_1
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
@ -109,12 +109,12 @@ body: |
; GFX6-LABEL: name: load_private_p3_from_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_p3_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@ -139,12 +139,12 @@ body: |
; GFX6-LABEL: name: load_private_p5_from_4
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_p5_from_4
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@ -170,12 +170,12 @@ body: |
; GFX6-LABEL: name: load_private_v2s16
; GFX6: liveins: $vgpr0
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_v2s16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@ -206,12 +206,12 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 2047
@ -240,14 +240,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 2147483647
@ -279,12 +279,12 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_2048
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 2048
@ -313,14 +313,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -2047
@ -349,14 +349,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -2048
@ -385,12 +385,12 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_4095
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 4095
@ -419,14 +419,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_4096
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 4096
@ -455,14 +455,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -4095
@ -491,14 +491,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -4096
@ -527,14 +527,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_8191
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 8191
@ -563,14 +563,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_8192
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 8192
@ -599,14 +599,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -8191
@ -635,14 +635,14 @@ body: |
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(s32) = G_CONSTANT i32 -8192
@ -666,10 +666,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_4_constant_0
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; GFX9-LABEL: name: load_private_s32_from_4_constant_0
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
%0:vgpr(p5) = G_CONSTANT i32 0
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@ -691,10 +691,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
%0:sgpr(p5) = G_CONSTANT i32 16
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@ -716,10 +716,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_1_constant_4095
; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
; GFX9-LABEL: name: load_private_s32_from_1_constant_4095
; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
%0:vgpr(p5) = G_CONSTANT i32 4095
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
@ -742,11 +742,11 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_1_constant_4096
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_constant_4096
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_CONSTANT i32 4096
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
@ -770,10 +770,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_fi
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_fi
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
@ -796,10 +796,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4095
@ -825,10 +825,10 @@ body: |
bb.0:
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:sgpr(s32) = G_CONSTANT i32 4095
@ -858,13 +858,13 @@ body: |
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4096
@ -890,11 +890,11 @@ body: |
; GFX6-LABEL: name: load_private_s32_from_neg1
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX9-LABEL: name: load_private_s32_from_neg1
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
%0:vgpr(p5) = G_CONSTANT i32 -1
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)

View File

@ -17,12 +17,12 @@ regBankSelected: true
# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
# Immediate offset:
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0
# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0, 0
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
# Max immediate offset for SI
# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0, 0
# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0, 0
# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
# Immediate overflow for SI
# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
@ -52,8 +52,8 @@ regBankSelected: true
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
# Immediate overflow for CI
# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
@ -66,7 +66,7 @@ regBankSelected: true
# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
# Max 32-bit byte offset
# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
@ -84,8 +84,8 @@ regBankSelected: true
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
# Pointer loads
# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
@ -192,8 +192,8 @@ body: |
# GCN-LABEL: name: constant_address_positive{{$}}
# GCN: %0:sreg_64 = S_MOV_B64 44
# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load 4, addrspace 4)
# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load 4, addrspace 4)
---

View File

@ -18,16 +18,16 @@ body: |
; WAVE64: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; WAVE64: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE64: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; WAVE32-LABEL: name: sitofp
; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; WAVE32: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; WAVE32: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; WAVE32: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s32) = COPY $vgpr0

View File

@ -17,12 +17,12 @@ body: |
; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p0) = COPY $vgpr1_vgpr2
G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
@ -152,12 +152,12 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
%0:vgpr(s64) = COPY $vgpr0_vgpr1
%1:vgpr(p0) = COPY $vgpr2_vgpr3
G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)

View File

@ -19,22 +19,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_s32_to_4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_s32_to_4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_s32_to_4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
@ -55,22 +55,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX8-LABEL: name: store_flat_s32_to_2
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX9-LABEL: name: store_flat_s32_to_2
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX10-LABEL: name: store_flat_s32_to_2
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 2, align 2, addrspace 0)
@ -91,22 +91,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX8-LABEL: name: store_flat_s32_to_1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX9-LABEL: name: store_flat_s32_to_1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX10-LABEL: name: store_flat_s32_to_1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 1, align 1, addrspace 0)
@ -128,22 +128,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@ -237,22 +237,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_v2s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_v2s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_v2s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@ -273,22 +273,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX8-LABEL: name: store_flat_v3s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX9-LABEL: name: store_flat_v3s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX10-LABEL: name: store_flat_v3s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
G_STORE %1, %0 :: (store 12, align 16, addrspace 0)
@ -309,22 +309,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX8-LABEL: name: store_flat_v4s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX9-LABEL: name: store_flat_v4s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX10-LABEL: name: store_flat_v4s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
@ -346,22 +346,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_v2s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_v2s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_v2s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
@ -383,22 +383,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_v4s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_v4s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_v4s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@ -493,22 +493,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX8-LABEL: name: store_flat_v2s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX9-LABEL: name: store_flat_v2s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX10-LABEL: name: store_flat_v2s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
@ -530,22 +530,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8-LABEL: name: store_flat_p1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9-LABEL: name: store_flat_p1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10-LABEL: name: store_flat_p1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
@ -604,22 +604,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_p3
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_p3
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_p3
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
@ -677,22 +677,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX8-LABEL: name: store_atomic_flat_s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX9-LABEL: name: store_atomic_flat_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX10-LABEL: name: store_atomic_flat_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0)
@ -714,22 +714,22 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX8-LABEL: name: store_atomic_flat_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX9-LABEL: name: store_atomic_flat_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX10-LABEL: name: store_atomic_flat_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0)
@ -761,7 +761,7 @@ body: |
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8-LABEL: name: store_flat_s32_gep_2047
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -776,12 +776,12 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9-LABEL: name: store_flat_s32_gep_2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10-LABEL: name: store_flat_s32_gep_2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -796,7 +796,7 @@ body: |
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047

View File

@ -26,7 +26,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_4
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -36,27 +36,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_4
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_s32_to_4
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_s32_to_4
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_s32_to_4
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
@ -82,7 +82,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_2
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -92,27 +92,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_2
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
; GFX8-LABEL: name: store_global_s32_to_2
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
; GFX9-LABEL: name: store_global_s32_to_2
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX10-LABEL: name: store_global_s32_to_2
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 2, align 2, addrspace 1)
@ -138,7 +138,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX7-LABEL: name: store_global_s32_to_1
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -148,27 +148,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_to_1
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
; GFX8-LABEL: name: store_global_s32_to_1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
; GFX9-LABEL: name: store_global_s32_to_1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX10-LABEL: name: store_global_s32_to_1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store 1, align 1, addrspace 1)
@ -195,27 +195,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@ -288,7 +288,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX7-LABEL: name: store_global_v2s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -298,27 +298,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v2s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_v2s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_v2s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_v2s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@ -344,7 +344,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX7-LABEL: name: store_global_v4s32
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -354,27 +354,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v4s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX8-LABEL: name: store_global_v4s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX9-LABEL: name: store_global_v4s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX10-LABEL: name: store_global_v4s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
@ -401,27 +401,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v2s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_v2s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_v2s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_v2s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s16>) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
@ -448,27 +448,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v4s16
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_v4s16
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_v4s16
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_v4s16
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@ -542,27 +542,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v2s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX8-LABEL: name: store_global_v2s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
; GFX9-LABEL: name: store_global_v2s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX10-LABEL: name: store_global_v2s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
@ -589,27 +589,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_p1
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8-LABEL: name: store_global_p1
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
; GFX9-LABEL: name: store_global_p1
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10-LABEL: name: store_global_p1
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p1) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
@ -683,27 +683,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_p3
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_p3
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_p3
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_p3
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(p3) = COPY $vgpr2
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
@ -776,27 +776,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_atomic_global_s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX8-LABEL: name: store_atomic_global_s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
; GFX9-LABEL: name: store_atomic_global_s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
; GFX10-LABEL: name: store_atomic_global_s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1)
@ -823,27 +823,27 @@ body: |
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX7-FLAT-LABEL: name: store_atomic_global_s64
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX8-LABEL: name: store_atomic_global_s64
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
; GFX9-LABEL: name: store_atomic_global_s64
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
; GFX10-LABEL: name: store_atomic_global_s64
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s64) = COPY $vgpr2_vgpr3
G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1)
@ -870,7 +870,7 @@ body: |
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-LABEL: name: store_global_s32_gep_2047
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -880,7 +880,7 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -895,7 +895,7 @@ body: |
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8-LABEL: name: store_global_s32_gep_2047
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@ -910,17 +910,17 @@ body: |
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; GFX9-LABEL: name: store_global_s32_gep_2047
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10-LABEL: name: store_global_s32_gep_2047
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s64) = G_CONSTANT i64 2047

View File

@ -1,3 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-FLAT %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
@ -26,27 +27,27 @@ body: |
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX7-FLAT-LABEL: name: store_global_v3s32
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
; GFX8-LABEL: name: store_global_v3s32
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
; GFX9-LABEL: name: store_global_v3s32
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX10-LABEL: name: store_global_v3s32
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
G_STORE %1, %0 :: (store 12, align 16, addrspace 1)

View File

@ -21,12 +21,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_4
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -52,12 +52,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_2
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
@ -83,12 +83,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
@ -114,12 +114,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_v2s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -145,12 +145,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_p3
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -176,12 +176,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: function_store_private_p5
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -206,10 +206,10 @@ body: |
; GFX6-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4095
%2:vgpr(p5) = G_PTR_ADD %0, %1
@ -236,10 +236,10 @@ body: |
; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4095
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4095
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4095
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
@ -265,11 +265,11 @@ body: |
; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4096
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4096
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4096
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
@ -294,12 +294,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_4
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -324,12 +324,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_2
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
@ -354,12 +354,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
@ -384,12 +384,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_v2s16
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(<2 x s16>) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -414,12 +414,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_p3
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p3) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -444,12 +444,12 @@ body: |
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_p5
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
%0:vgpr(p5) = COPY $vgpr0
%1:vgpr(p5) = COPY $vgpr1
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
@ -475,11 +475,11 @@ body: |
; GFX6-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
%1:vgpr(s32) = G_CONSTANT i32 4095
%2:vgpr(p5) = G_PTR_ADD %0, %1
@ -507,11 +507,11 @@ body: |
; GFX6-LABEL: name: kernel_store_private_s32_to_1_constant_4095
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4095
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4095
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
@ -538,12 +538,12 @@ body: |
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4096
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
%0:vgpr(p5) = G_CONSTANT i32 4096
%1:vgpr(s32) = G_CONSTANT i32 0
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)

View File

@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc__vg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -56,7 +56,7 @@ define amdgpu_ps <2 x float> @raw_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc__vg
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1
; CHECK: $vgpr0 = COPY [[COPY8]]
@ -81,7 +81,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgp
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -170,7 +170,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -195,7 +195,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@ -217,7 +217,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)

View File

@ -16,7 +16,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY8]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -40,7 +40,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: S_ENDPGM 0
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -84,7 +84,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@ -137,7 +137,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@ -165,7 +165,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY8]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0

View File

@ -28,7 +28,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -59,7 +59,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -89,7 +89,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@ -119,7 +119,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
ret void
@ -200,7 +200,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -284,7 +284,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%voffset = add i32 %voffset.base, 4095
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -342,7 +342,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: S_ENDPGM 0
; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
; GFX90A: bb.1 (%ir-block.0):
@ -355,7 +355,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
ret void
@ -386,7 +386,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -415,7 +415,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
ret void

View File

@ -14,7 +14,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
@ -27,7 +27,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -45,7 +45,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
@ -58,7 +58,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@ -93,7 +93,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY6]]
@ -109,7 +109,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
@ -169,7 +169,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -209,7 +209,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -234,7 +234,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY6]]
@ -250,7 +250,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2

View File

@ -13,7 +13,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset__sgp
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -31,7 +31,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@ -52,7 +52,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_format_v3f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
@ -75,7 +75,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -146,7 +146,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2

View File

@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffs
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -69,7 +69,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -114,7 +114,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -140,7 +140,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
@ -159,7 +159,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
@ -178,7 +178,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
@ -197,7 +197,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
@ -216,7 +216,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
@ -235,7 +235,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
@ -254,7 +254,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@ -275,7 +275,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2
@ -298,7 +298,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sg
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
@ -323,7 +323,7 @@ define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -341,7 +341,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgp
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -365,7 +365,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgp
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@ -386,7 +386,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -406,7 +406,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -444,7 +444,7 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -486,7 +486,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -512,7 +512,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffs
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
@ -529,7 +529,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
@ -548,7 +548,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
@ -566,7 +566,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 16
@ -585,7 +585,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@ -607,7 +607,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4096
@ -626,7 +626,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
@ -644,7 +644,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
@ -664,7 +664,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 16
@ -685,7 +685,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4095
@ -706,7 +706,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%soffset = add i32 %soffset.base, 4096
@ -744,7 +744,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -790,7 +790,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec

View File

@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
@ -27,7 +27,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -44,7 +44,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
@ -56,7 +56,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; PACKED: bb.1 (%ir-block.0):
@ -91,7 +91,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -116,7 +116,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; PACKED: bb.1 (%ir-block.0):
@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -211,7 +211,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -240,7 +240,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; PACKED: bb.1 (%ir-block.0):
@ -253,7 +253,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
@ -275,7 +275,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; PACKED: bb.1 (%ir-block.0):
@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
@ -310,7 +310,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
; PACKED: bb.1 (%ir-block.0):
@ -323,7 +323,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -346,7 +346,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
; PACKED: bb.1 (%ir-block.0):
@ -359,7 +359,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -385,7 +385,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
; PACKED: bb.1 (%ir-block.0):
@ -401,7 +401,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -448,7 +448,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -489,7 +489,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec

View File

@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@ -51,7 +51,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -94,7 +94,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -132,7 +132,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
@ -199,7 +199,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -220,7 +220,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -244,7 +244,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec

View File

@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -103,7 +103,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -148,7 +148,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
ret void
@ -191,7 +191,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
ret void
@ -209,7 +209,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
ret void
@ -227,7 +227,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
ret void
@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
ret void
@ -263,7 +263,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
ret void
@ -281,7 +281,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
ret void
@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -344,7 +344,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -362,7 +362,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i8
call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -381,7 +381,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i16
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -418,7 +418,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -474,7 +474,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -498,7 +498,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
@ -517,7 +517,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
ret void
@ -535,7 +535,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -554,7 +554,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -576,7 +576,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -595,7 +595,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
@ -613,7 +613,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
@ -631,7 +631,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -650,7 +650,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -672,7 +672,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
@ -711,7 +711,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -755,7 +755,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec

View File

@ -13,7 +13,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
@ -26,7 +26,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
@ -44,7 +44,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@ -68,7 +68,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
@ -92,7 +92,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
@ -126,7 +126,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY6]]
@ -167,7 +167,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -207,7 +207,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -232,7 +232,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
@ -245,7 +245,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
@ -263,7 +263,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
@ -276,7 +276,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
@ -294,7 +294,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
@ -307,7 +307,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
@ -325,7 +325,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
@ -338,7 +338,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)

View File

@ -12,7 +12,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
@ -30,7 +30,7 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY6]]
@ -51,7 +51,7 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
@ -74,7 +74,7 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
@ -119,7 +119,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -144,7 +144,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
@ -162,7 +162,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
@ -180,7 +180,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
@ -198,7 +198,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)

View File

@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@ -49,7 +49,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@ -93,7 +93,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -108,7 +108,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -179,7 +179,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -224,7 +224,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -263,7 +263,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -349,7 +349,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -374,7 +374,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
; PACKED: bb.1 (%ir-block.0):
@ -387,7 +387,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
ret void
@ -405,7 +405,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
; PACKED: bb.1 (%ir-block.0):
@ -418,7 +418,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
ret void
@ -436,7 +436,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
; PACKED: bb.1 (%ir-block.0):
@ -449,7 +449,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
ret void
@ -467,7 +467,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
; PACKED: bb.1 (%ir-block.0):
@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
ret void

View File

@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
ret void
@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -98,7 +98,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -182,7 +182,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -228,7 +228,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -268,7 +268,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec

View File

@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@ -35,7 +35,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@ -57,7 +57,7 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@ -80,7 +80,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
ret void
@ -100,7 +100,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
ret void
@ -135,7 +135,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -180,7 +180,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -226,7 +226,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -252,7 +252,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
ret void
@ -271,7 +271,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
ret void
@ -290,7 +290,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
ret void
@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
ret void
@ -328,7 +328,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0)
ret void
@ -345,7 +345,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0)
ret void
@ -364,7 +364,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0)
ret void
@ -382,7 +382,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset = add i32 %voffset.base, 16
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@ -401,7 +401,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset = add i32 %voffset.base, 4095
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@ -423,7 +423,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset = add i32 %voffset.base, 4096
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@ -442,7 +442,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0)
ret void
@ -460,7 +460,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0)
ret void
@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%soffset = add i32 %soffset.base, 16
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@ -501,7 +501,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%soffset = add i32 %soffset.base, 4095
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@ -522,7 +522,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%soffset = add i32 %soffset.base, 4096
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
@ -560,7 +560,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -606,7 +606,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec

View File

@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -38,7 +38,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -62,7 +62,7 @@ define amdgpu_ps <2 x float> @struct_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc_
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0
; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1
; CHECK: $vgpr0 = COPY [[COPY9]]
@ -89,7 +89,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -132,7 +132,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -184,7 +184,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -212,7 +212,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)

View File

@ -19,7 +19,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY9]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -45,7 +45,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cm
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: S_ENDPGM 0
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -92,7 +92,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@ -148,7 +148,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
@ -178,7 +178,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
; CHECK: $vgpr0 = COPY [[COPY9]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0

View File

@ -32,7 +32,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -67,7 +67,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
@ -99,7 +99,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0)
ret void
@ -131,7 +131,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
@ -218,7 +218,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; GFX90A: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -306,7 +306,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -334,7 +334,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: S_ENDPGM 0
; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
; GFX90A: bb.1 (%ir-block.0):
@ -349,7 +349,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
@ -367,7 +367,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX908: S_ENDPGM 0
; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc
; GFX90A: bb.1 (%ir-block.0):
@ -380,7 +380,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2)
ret void
@ -415,7 +415,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -446,7 +446,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GFX90A: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void

View File

@ -15,7 +15,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@ -30,7 +30,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -50,7 +50,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@ -76,7 +76,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -102,7 +102,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
@ -138,7 +138,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY7]]
@ -183,7 +183,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -247,7 +247,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -277,7 +277,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffsset_add_4095
@ -292,7 +292,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@ -313,7 +313,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; PACKED-LABEL: name: struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@ -328,7 +328,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)

View File

@ -15,7 +15,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32__sgpr_rsrc__vgpr_v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_format_v3f32__sgpr_rsrc__vgpr_v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2
@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__sgpr_rsrc__vgpr_v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2
@ -132,7 +132,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -166,7 +166,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@ -187,7 +187,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)

View File

@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -37,7 +37,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@ -61,7 +61,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2
@ -87,7 +87,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2
@ -116,7 +116,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0)
@ -137,7 +137,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095
@ -158,7 +158,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0)
@ -200,7 +200,7 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -227,7 +227,7 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -249,7 +249,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -272,7 +272,7 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -294,7 +294,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgp
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
@ -318,7 +318,7 @@ define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voff
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -339,7 +339,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -366,7 +366,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__v
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@ -390,7 +390,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1)

View File

@ -16,7 +16,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -31,7 +31,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -55,7 +55,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -70,7 +70,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -103,7 +103,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -120,7 +120,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; PACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -162,7 +162,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -206,7 +206,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -233,7 +233,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
; PACKED: bb.1 (%ir-block.0):
@ -248,7 +248,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void

View File

@ -15,7 +15,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -37,7 +37,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vg
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -60,7 +60,7 @@ define amdgpu_ps void @struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vg
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -84,7 +84,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vg
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -126,7 +126,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -153,7 +153,7 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void

View File

@ -17,7 +17,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -62,7 +62,7 @@ define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -86,7 +86,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -132,7 +132,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -159,7 +159,7 @@ define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i8
call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -180,7 +180,7 @@ define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i16
call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
@ -201,7 +201,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1)
ret void
@ -221,7 +221,7 @@ define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
@ -249,7 +249,7 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void

View File

@ -16,7 +16,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@ -31,7 +31,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
@ -51,7 +51,7 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
@ -66,7 +66,7 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
@ -103,7 +103,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
; PACKED: $vgpr0 = COPY [[COPY7]]
@ -121,7 +121,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
@ -163,7 +163,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0
@ -179,7 +179,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0)
@ -220,7 +220,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -266,7 +266,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -314,7 +314,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095
@ -329,7 +329,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095

View File

@ -15,7 +15,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1
; CHECK: $vgpr0 = COPY [[COPY7]]
@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2
@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2
@ -111,7 +111,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0)
@ -152,7 +152,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
@ -186,7 +186,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%voffset = add i32 %voffset.base, 4095

View File

@ -24,7 +24,7 @@ body: |
; CHECK: $sgpr1 = COPY killed $sgpr5
; CHECK: $sgpr4_sgpr5 = S_GETPC_B64
; CHECK: $sgpr4 = S_MOV_B32 $sgpr8
; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load 16, align 4, addrspace 4)
; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load 16, align 4, addrspace 4)
bb.0:
successors: %bb.1, %bb.2
liveins: $sgpr0
@ -39,7 +39,7 @@ body: |
bb.1:
renamable $vgpr0 = V_MOV_B32_e32 1065353216, implicit $exec
BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
bb.2:
S_ENDPGM 0

View File

@ -14,9 +14,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -33,9 +33,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -52,9 +52,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -71,9 +71,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -92,9 +92,9 @@ body: |
%1:vgpr_32 = COPY $vgpr2
%3:sgpr_256 = IMPLICIT_DEF
%2:vreg_256 = COPY %3:sgpr_256
%4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -112,11 +112,11 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_512 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -134,15 +134,15 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_1024 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -160,9 +160,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_64 = IMPLICIT_DEF
%2:vreg_64 = COPY %3:areg_64
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -180,9 +180,9 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_128 = IMPLICIT_DEF
%2:vreg_128 = COPY %3:areg_128
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -200,12 +200,12 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_512 = IMPLICIT_DEF
%2:vreg_512 = COPY %3:areg_512
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -223,14 +223,14 @@ body: |
%0:vreg_64 = COPY $vgpr0_vgpr1
%3:areg_1024 = IMPLICIT_DEF
%2:vreg_1024 = COPY %3:areg_1024
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...

View File

@ -16,9 +16,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -35,9 +35,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -54,9 +54,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -73,9 +73,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -94,9 +94,9 @@ body: |
%1:vgpr_32 = COPY $vgpr2
%3:sgpr_256 = IMPLICIT_DEF
%2:vreg_256_align2 = COPY %3:sgpr_256
%4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
%4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -114,11 +114,11 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_512_align2 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -136,15 +136,15 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%1:vgpr_32 = COPY $vgpr2
%2:vreg_1024_align2 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -162,9 +162,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_64 = IMPLICIT_DEF
%2:vreg_64_align2 = COPY %3:areg_64
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -182,9 +182,9 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_128 = IMPLICIT_DEF
%2:vreg_128_align2 = COPY %3:areg_128
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -202,12 +202,12 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_512 = IMPLICIT_DEF
%2:vreg_512_align2 = COPY %3:areg_512
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
---
@ -225,14 +225,14 @@ body: |
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
%3:areg_1024 = IMPLICIT_DEF
%2:vreg_1024_align2 = COPY %3:areg_1024
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
%1:vgpr_32 = COPY $agpr0
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...

View File

@ -4,7 +4,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load 16 from %ir.13, addrspace 4)
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load 16 from %ir.13, addrspace 4)
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
.entry:

View File

@ -83,14 +83,14 @@ body: |
successors: %bb.1(0x40000000), %bb.4(0x40000000)
liveins: $sgpr4_sgpr5
renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0, 0 :: (dereferenceable invariant load 4 from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4)
renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0 :: (dereferenceable invariant load 4 from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4)
S_WAITCNT 49279
renamable $vgpr0 = nofpexcept V_MUL_F32_e64 0, killed $sgpr4, 0, $sgpr4, 0, 0, implicit $mode, implicit $exec
DBG_VALUE renamable $sgpr6_sgpr7, $noreg, !11, !DIExpression(DW_OP_plus_uconst, 12, DW_OP_stack_value), debug-location !12
$vgpr1 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr6_sgpr7
$vgpr2 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit killed $sgpr6_sgpr7, implicit $exec
GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, 0, 0, 0, implicit $exec, debug-location !12 :: (store 4 into %ir.tmp2, addrspace 1)
GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, implicit $exec, debug-location !12 :: (store 4 into %ir.tmp2, addrspace 1)
renamable $sgpr4 = S_MOV_B32 8388608
renamable $sgpr4_sgpr5 = nofpexcept V_CMP_GT_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
@ -102,7 +102,7 @@ body: |
renamable $sgpr4_sgpr5 = IMPLICIT_DEF
$vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
$vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $exec
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
renamable $sgpr4 = S_MOV_B32 2139095040
S_WAITCNT 3952
renamable $sgpr4_sgpr5 = nofpexcept V_CMP_NEQ_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec

View File

@ -8,9 +8,9 @@ name: trivial_smem_clause_load_smrd4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@ -20,11 +20,11 @@ name: trivial_smem_clause_load_smrd4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -34,13 +34,13 @@ name: trivial_smem_clause_load_smrd4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
S_ENDPGM 0
...
---
@ -50,15 +50,15 @@ name: trivial_smem_clause_load_smrd4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
$sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
$sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
S_ENDPGM 0
...
---
@ -67,11 +67,11 @@ name: trivial_smem_clause_load_smrd4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@ -81,9 +81,9 @@ name: smrd_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@ -93,9 +93,9 @@ name: smrd_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@ -105,9 +105,9 @@ name: smrd_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: smrd_load8_overwrite_ptr
; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
S_ENDPGM 0
...
---
@ -119,46 +119,46 @@ name: break_smem_clause_at_max_smem_clause_size_smrd_load4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
; GCN-NEXT: S_ENDPGM 0
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
$sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
S_ENDPGM 0
...
@ -169,12 +169,12 @@ name: break_smem_clause_simple_load_smrd4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -184,11 +184,11 @@ name: break_smem_clause_simple_load_smrd4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -198,12 +198,12 @@ name: break_smem_clause_simple_load_smrd8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -213,11 +213,11 @@ name: break_smem_clause_simple_load_smrd16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
$sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
S_ENDPGM 0
...
---
@ -228,16 +228,16 @@ body: |
; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
bb.0:
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
bb.1:
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -248,11 +248,11 @@ name: break_smem_clause_store_load_into_ptr_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
; GCN-NEXT: S_ENDPGM 0
S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
S_ENDPGM 0
...
---
@ -264,11 +264,11 @@ name: break_smem_clause_store_load_into_data_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
$sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
$sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -278,13 +278,13 @@ name: valu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -294,13 +294,13 @@ name: salu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$sgpr8 = S_MOV_B32 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -309,13 +309,13 @@ name: ds_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -325,13 +325,13 @@ name: flat_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: flat_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
S_ENDPGM 0
...
---
@ -341,11 +341,11 @@ name: implicit_use_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_smem_clause
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
$sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
$sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
S_ENDPGM 0
...

View File

@ -9,10 +9,10 @@ name: trivial_clause_load_flat4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x1
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -22,12 +22,12 @@ name: trivial_clause_load_flat4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -37,14 +37,14 @@ name: trivial_clause_load_flat4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x3
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -54,16 +54,16 @@ name: trivial_clause_load_flat4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x4
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -73,12 +73,12 @@ name: trivial_clause_load_flat4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -88,10 +88,10 @@ name: flat_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -101,10 +101,10 @@ name: flat_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -114,10 +114,10 @@ name: flat_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: flat_load8_overwrite_ptr
; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -130,48 +130,48 @@ name: break_clause_at_max_clause_size_flat_load4
body: |
bb.0:
; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
; GCN-NEXT: S_ENDPGM 0
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
S_ENDPGM 0
...
@ -182,13 +182,13 @@ name: break_clause_simple_load_flat4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -198,13 +198,13 @@ name: break_clause_simple_load_flat4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -214,13 +214,13 @@ name: break_clause_simple_load_flat8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -231,12 +231,12 @@ name: break_clause_simple_load_flat16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -251,17 +251,17 @@ body: |
; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
; GCN: bb.0:
; GCN-NEXT: successors: %bb.1(0x80000000)
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
bb.0:
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
bb.1:
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -272,12 +272,12 @@ name: break_clause_store_load_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -289,12 +289,12 @@ name: break_clause_store_load_into_data_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_store_load_into_data_flat4
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -305,14 +305,14 @@ name: valu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_clause
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -323,14 +323,14 @@ name: salu_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_clause
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$sgpr8 = S_MOV_B32 0
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -340,14 +340,14 @@ name: ds_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_clause
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -357,14 +357,14 @@ name: smrd_inst_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: smrd_inst_breaks_clause
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -374,13 +374,13 @@ name: implicit_use_breaks_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_clause
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
$vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
$vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -389,12 +389,12 @@ name: trivial_clause_load_mubuf4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@ -403,13 +403,13 @@ name: break_clause_simple_load_mubuf_offen_ptr
body: |
bb.0:
; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@ -420,11 +420,11 @@ name: mubuf_load4_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: mubuf_load4_overwrite_ptr
; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
S_ENDPGM 0
@ -437,13 +437,13 @@ name: break_clause_flat_load_mubuf_load
body: |
bb.0:
; GCN-LABEL: name: break_clause_flat_load_mubuf_load
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
# Break a clause from interference between mubuf and flat instructions
@ -458,8 +458,8 @@ name: break_clause_mubuf_load_flat_load
body: |
bb.0:
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@ -470,13 +470,13 @@ name: break_clause_atomic_rtn_into_ptr_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -485,12 +485,12 @@ name: break_clause_atomic_nortn_ptr_load_flat4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: S_ENDPGM 0
FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
$vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -500,13 +500,13 @@ name: break_clause_atomic_rtn_into_ptr_mubuf4
body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, 0, implicit $exec
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
; GCN-NEXT: S_ENDPGM 0
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
S_ENDPGM 0
...
---
@ -517,11 +517,11 @@ body: |
bb.0:
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@ -532,11 +532,11 @@ name: no_break_clause_mubuf_load_novaddr
body: |
bb.0:
; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@ -546,16 +546,16 @@ name: mix_load_store_clause
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
---
@ -565,15 +565,15 @@ name: mix_load_store_clause_same_address
body: |
bb.0:
; GCN-LABEL: name: mix_load_store_clause_same_address
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...

View File

@ -9,27 +9,27 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 16, align 1, addrspace 4)
; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 16, align 1, addrspace 4)
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 32, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 32, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 48, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 48, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 64, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 64, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 80, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
@ -38,19 +38,19 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 96, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 96, align 1, addrspace 4)
; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
; GCN: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]].sub0
; GCN: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0
; GCN: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
; GCN: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
@ -60,23 +60,23 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 160, align 1, addrspace 4)
; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80
@ -88,77 +88,77 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
; GCN: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0
; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88
; GCN: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF5]]
; GCN: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY15]].sub0
; GCN: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176
; GCN: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
; GCN: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF6]]
; GCN: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0
; GCN: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF7]]
; GCN: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0
; GCN: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF8]]
; GCN: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112
; GCN: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224
; GCN: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1
; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120
; GCN: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240
; GCN: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 256, align 1, addrspace 4)
@ -176,68 +176,68 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: [[COPY42:%[0-9]+]]:vreg_64 = COPY [[DEF9]]
; GCN: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[COPY42]].sub0
; GCN: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136
; GCN: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: [[COPY45:%[0-9]+]]:vreg_64 = COPY [[DEF10]]
; GCN: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[COPY45]].sub0
; GCN: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272
; GCN: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
; GCN: [[COPY48:%[0-9]+]]:vreg_64 = COPY [[DEF11]]
; GCN: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY48]].sub0
; GCN: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY50:%[0-9]+]]:vreg_64 = COPY [[DEF12]]
; GCN: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY50]].sub0
; GCN: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY54:%[0-9]+]]:vreg_64 = COPY [[DEF13]]
; GCN: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[COPY54]].sub0
; GCN: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY56:%[0-9]+]]:vreg_64 = COPY [[DEF14]]
; GCN: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[COPY56]].sub0
; GCN: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY58:%[0-9]+]]:vreg_64 = COPY [[DEF15]]
; GCN: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[COPY58]].sub0
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY60:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: [[COPY61:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288
; GCN: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
; GCN: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152
; GCN: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304
; GCN: [[COPY67:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GCN: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[COPY]]
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
; GCN: S_ENDPGM 0
bb.0:
%tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0

View File

@ -10,14 +10,14 @@ body: |
bb.0:
; GCN-LABEL: name: src_bundle_latency
; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
; GCN: }
; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
$vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
}
$vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
$vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
@ -32,13 +32,13 @@ body: |
; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, implicit $exec
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, implicit $exec
; GCN: }
$vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
$vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
}
...

View File

@ -25,7 +25,7 @@ body: |
; GCN-LABEL: name: call_waw_waitcnt
; GCN: liveins: $sgpr4_sgpr5, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3
; GCN: S_WAITCNT 0
; GCN: $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0, 0
; GCN: $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0
; GCN: $sgpr33 = S_MOV_B32 killed $sgpr7
; GCN: $flat_scr_lo = S_ADD_U32 killed $sgpr4, $sgpr33, implicit-def $scc
; GCN: $flat_scr_hi = S_ADDC_U32 killed $sgpr5, 0, implicit-def $scc, implicit killed $scc
@ -38,7 +38,7 @@ body: |
; GCN: S_WAITCNT 49279
; GCN: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def dead $vgpr0
; GCN: S_ENDPGM 0
$sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0, 0
$sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0
$sgpr33 = S_MOV_B32 killed $sgpr7
$flat_scr_lo = S_ADD_U32 killed $sgpr4, $sgpr33, implicit-def $scc
$flat_scr_hi = S_ADDC_U32 killed $sgpr5, 0, implicit-def $scc, implicit killed $scc

View File

@ -43,8 +43,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@ -55,10 +55,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -105,8 +105,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@ -117,10 +117,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
---
@ -168,8 +168,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@ -180,10 +180,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -233,8 +233,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@ -245,10 +245,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -310,8 +310,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@ -322,10 +322,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -375,8 +375,8 @@ body: |
%3 = COPY $vgpr0
%0 = COPY $sgpr0_sgpr1
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
%25 = REG_SEQUENCE %3, 1, %24, 2
%10 = S_MOV_B32 61440
@ -387,10 +387,10 @@ body: |
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
%16 = REG_SEQUENCE killed %4, 17, %12, 18
%18 = COPY %26
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $mode, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...

View File

@ -17,15 +17,15 @@ body: |
$vgpr0_vgpr1 = IMPLICIT_DEF
$vgpr4_vgpr5 = IMPLICIT_DEF
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
$vgpr2 = IMPLICIT_DEF
$vgpr3 = IMPLICIT_DEF
$vgpr6 = IMPLICIT_DEF
$vgpr0 = V_ADD_CO_U32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
$vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec
FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
S_ENDPGM 0
...

View File

@ -14,7 +14,7 @@ registers:
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
%3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
%3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4)
...

View File

@ -30,7 +30,7 @@ body: |
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
S_ENDPGM 0
bb.2:
@ -78,7 +78,7 @@ body: |
bb.8:
successors: %bb.10
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
%35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
%28:vgpr_32 = COPY %35

View File

@ -83,7 +83,7 @@ body: |
bb.9:
successors: %bb.10(0x80000000)
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec
%22:sreg_64 = COPY $exec, implicit-def $exec
%23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc
@ -125,7 +125,7 @@ body: |
%27.sub5:sgpr_256 = COPY %26
%27.sub6:sgpr_256 = COPY %26
%27.sub7:sgpr_256 = COPY killed %26
%28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%29:vgpr_32 = nofpexcept V_ADD_F32_e32 0, killed %28, implicit $mode, implicit $exec
$m0 = S_MOV_B32 -1
DS_WRITE_B32 undef %30:vgpr_32, killed %29, 0, 0, implicit $m0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`, addrspace 3)

View File

@ -68,7 +68,7 @@ body: |
%23:vreg_128 = COPY killed %17
%24:sreg_64 = COPY killed %16
%25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
%28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec
%29:vreg_128 = COPY killed %21
%29.sub0:vreg_128 = COPY %1
@ -257,7 +257,7 @@ body: |
%109.sub5:sgpr_256 = COPY %108
%109.sub6:sgpr_256 = COPY %108
%109.sub7:sgpr_256 = COPY killed %108
%110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%112:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %110, implicit $mode, implicit $exec
%113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %112, implicit $mode, implicit $exec
%114:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec

View File

@ -46,10 +46,10 @@ body: |
%0 = COPY $sgpr2_sgpr3
%1 = COPY $vgpr2
%2 = COPY $vgpr3
%3 = S_LOAD_DWORDX8_IMM %0, 0, 0, 0
%4 = S_LOAD_DWORDX4_IMM %0, 12, 0, 0
%5 = S_LOAD_DWORDX8_IMM %0, 16, 0, 0
%6 = S_LOAD_DWORDX4_IMM %0, 28, 0, 0
%3 = S_LOAD_DWORDX8_IMM %0, 0, 0
%4 = S_LOAD_DWORDX4_IMM %0, 12, 0
%5 = S_LOAD_DWORDX8_IMM %0, 16, 0
%6 = S_LOAD_DWORDX4_IMM %0, 28, 0
undef %7.sub0 = S_MOV_B32 212739
%20 = COPY %7
%11 = COPY %20
@ -61,7 +61,7 @@ body: |
%11.sub6 = COPY %1
%11.sub7 = COPY %1
%11.sub8 = COPY %1
dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
%20.sub1 = COPY %2
%20.sub2 = COPY %2
%20.sub3 = COPY %2
@ -70,6 +70,6 @@ body: |
%20.sub6 = COPY %2
%20.sub7 = COPY %2
%20.sub8 = COPY %2
dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
...

View File

@ -11,7 +11,7 @@
#
# GCN-LABEL: bb.6:
# GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}})
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit $exec
#
--- |
@ -69,7 +69,7 @@ body: |
%10:sreg_64 = COPY killed %5
undef %11.sub2:sgpr_128 = COPY %4
%11.sub3:sgpr_128 = COPY %3
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, implicit $exec
undef %13.sub1:vreg_128 = COPY %9.sub1
%13.sub2:vreg_128 = COPY %9.sub2
%14:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $mode, implicit $exec
@ -161,7 +161,7 @@ body: |
bb.18:
successors: %bb.7(0x80000000)
dead %59:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $mode, implicit $exec
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, 0, 0, 0, implicit $exec
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, implicit $exec
undef %66.sub1:vreg_128 = COPY %13.sub1
%66.sub2:vreg_128 = COPY %13.sub2
%67:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $mode, implicit $exec

View File

@ -47,7 +47,7 @@ body: |
%4.sub5:sgpr_256 = COPY %1
%4.sub6:sgpr_256 = COPY %1
%4.sub7:sgpr_256 = COPY killed %1
%5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
%6:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %5, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
%7:vgpr_32 = nofpexcept V_RCP_F32_e32 killed %6, implicit $mode, implicit $exec
%8:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %7, implicit $mode, implicit $exec
@ -145,10 +145,10 @@ body: |
%40:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
%41:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $mode, implicit $exec
%42:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 killed %41, implicit $mode, implicit $exec
%43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0, 0 :: (dereferenceable invariant load 4)
%43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0 :: (dereferenceable invariant load 4)
%45:vgpr_32 = V_MUL_LO_I32_e64 killed %42, killed %43, implicit $exec
%46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec
%50:sreg_64 = COPY $exec, implicit-def $exec
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc

View File

@ -34,7 +34,7 @@ body: |
; GCN: S_BRANCH %bb.1
; GCN: bb.1:
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
@ -42,7 +42,7 @@ body: |
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
@ -54,7 +54,7 @@ body: |
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
@ -83,7 +83,7 @@ body: |
bb.1:
successors: %bb.2, %bb.3
undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
%7:vgpr_32 = COPY %5.sub1
@ -91,7 +91,7 @@ body: |
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
%5.sub3:sgpr_128 = S_MOV_B32 61440
%5.sub2:sgpr_128 = S_MOV_B32 0
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
%12:sreg_64 = COPY $exec, implicit-def $exec
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
@ -103,7 +103,7 @@ body: |
%5.sub0:sgpr_128 = COPY %5.sub2
%5.sub1:sgpr_128 = COPY %5.sub2
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
bb.3:
$exec = S_OR_B64 $exec, %12, implicit-def $scc

View File

@ -27,7 +27,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@ -37,7 +37,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -71,7 +71,7 @@ body: |
%3:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%13:vgpr_32 = V_ASHRREV_I32_e64 31, %3, implicit $exec
%14:vreg_64 = REG_SEQUENCE %3, %subreg.hi16, %13, %subreg.lo16
%15:vreg_64 = V_LSHLREV_B64_e64 2, killed %14, implicit $exec
@ -85,19 +85,19 @@ body: |
%16:vgpr_32 = V_MOV_B32_e32 63, implicit $exec
%9:vgpr_32 = V_AND_B32_e64 %8, %6, implicit $exec
FLAT_STORE_DWORD %19, %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %9, 0, 0, implicit $exec, implicit $flat_scr
%10:vgpr_32 = V_AND_B32_e64 %6, %8, implicit $exec
FLAT_STORE_DWORD %19, %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %10, 0, 0, implicit $exec, implicit $flat_scr
%11:vgpr_32 = V_AND_B32_e32 %8, %6, implicit $exec
FLAT_STORE_DWORD %19, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_AND_B32_e64 %8, %8, implicit $exec
FLAT_STORE_DWORD %19, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_AND_B32_e64 %16, %16, implicit $exec
FLAT_STORE_DWORD %19, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %13, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@ -117,7 +117,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%5:sreg_32_xm0_xexec = S_MOV_B32 1
%6:sreg_32_xm0 = COPY %4.sub1
%7:sreg_32_xm0 = COPY %4.sub0
@ -126,7 +126,7 @@ body: |
%10:sgpr_128 = REG_SEQUENCE killed %7, %subreg.hi16, killed %6, %subreg.lo16, killed %9, %subreg.sub0, killed %8, %subreg.sub0_sub1
%12:sreg_32_xm0 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc
%13:vgpr_32 = COPY %12
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -144,7 +144,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sreg_32_xm0_xexec = S_MOV_B32 999123
%4:sreg_32_xm0 = COPY %2.sub1
%5:sreg_32_xm0 = COPY %2.sub0
@ -153,7 +153,7 @@ body: |
%8:sgpr_128 = REG_SEQUENCE killed %5, %subreg.hi16, killed %4, %subreg.lo16, killed %7, %subreg.sub0, killed %6, %subreg.sub0_sub1
%10:sreg_32_xm0 = S_ASHR_I32 killed %3, 12, implicit-def dead $scc
%11:vgpr_32 = COPY %10
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -200,7 +200,7 @@ body: |
%2:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
@ -221,34 +221,34 @@ body: |
%32:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%11:vgpr_32 = V_ASHRREV_I32_e64 8, %10, implicit $exec
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_ASHRREV_I32_e64 %8, %10, implicit $exec
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_ASHR_I32_e64 %7, 3, implicit $exec
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
%14:vgpr_32 = V_ASHR_I32_e64 7, %29, implicit $exec
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
%15:vgpr_32 = V_ASHR_I32_e64 %27, %24, implicit $exec
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
%22:vgpr_32 = V_ASHR_I32_e64 %6, 4, implicit $exec
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
%23:vgpr_32 = V_ASHR_I32_e64 %6, %30, implicit $exec
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
%25:vgpr_32 = V_ASHR_I32_e32 %31, %31, implicit $exec
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
%26:vgpr_32 = V_ASHRREV_I32_e32 11, %10, implicit $exec
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
%28:vgpr_32 = V_ASHR_I32_e32 %27, %32, implicit $exec
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
@ -267,7 +267,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%3:sreg_32_xm0_xexec = S_MOV_B32 -999123
%4:sreg_32_xm0 = COPY %2.sub1
%5:sreg_32_xm0 = COPY %2.sub0
@ -276,7 +276,7 @@ body: |
%8:sgpr_128 = REG_SEQUENCE killed %5, %subreg.hi16, killed %4, %subreg.lo16, killed %7, %subreg.sub0, killed %6, %subreg.sub0_sub1
%10:sreg_32_xm0 = S_LSHR_B32 killed %3, 12, implicit-def dead $scc
%11:vgpr_32 = COPY %10
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -324,7 +324,7 @@ body: |
%2:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
@ -345,34 +345,34 @@ body: |
%32:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
%11:vgpr_32 = V_LSHRREV_B32_e64 8, %10, implicit $exec
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_LSHRREV_B32_e64 %8, %10, implicit $exec
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_LSHR_B32_e64 %7, 3, implicit $exec
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
%14:vgpr_32 = V_LSHR_B32_e64 7, %29, implicit $exec
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
%15:vgpr_32 = V_LSHR_B32_e64 %27, %24, implicit $exec
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
%22:vgpr_32 = V_LSHR_B32_e64 %6, 4, implicit $exec
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
%23:vgpr_32 = V_LSHR_B32_e64 %6, %30, implicit $exec
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
%25:vgpr_32 = V_LSHR_B32_e32 %31, %31, implicit $exec
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
%26:vgpr_32 = V_LSHRREV_B32_e32 11, %10, implicit $exec
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
%28:vgpr_32 = V_LSHR_B32_e32 %27, %32, implicit $exec
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@ -390,7 +390,7 @@ body: |
bb.0:
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%2:vgpr_32 = V_XOR_B32_e64 killed %0, undef %1:vgpr_32, implicit $exec
FLAT_STORE_DWORD undef %3:vreg_64, %2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD undef %3:vreg_64, %2, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@ -434,7 +434,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@ -444,7 +444,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_ANDN2_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -467,7 +467,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@ -477,7 +477,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_OR_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -516,7 +516,7 @@ body: |
%3:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%13:vgpr_32 = V_ASHRREV_I32_e64 31, %3, implicit $exec
%14:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %13, %subreg.sub1
%15:vreg_64 = V_LSHLREV_B64_e64 2, killed %14, implicit $exec
@ -529,15 +529,15 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 1234567
%16:vgpr_32 = V_MOV_B32_e32 63, implicit $exec
%9:vgpr_32 = V_OR_B32_e64 %8, %6, implicit $exec
FLAT_STORE_DWORD %19, %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %9, 0, 0, implicit $exec, implicit $flat_scr
%10:vgpr_32 = V_OR_B32_e64 %6, %8, implicit $exec
FLAT_STORE_DWORD %19, %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %10, 0, 0, implicit $exec, implicit $flat_scr
%11:vgpr_32 = V_OR_B32_e32 %8, %6, implicit $exec
FLAT_STORE_DWORD %19, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_OR_B32_e64 %8, %8, implicit $exec
FLAT_STORE_DWORD %19, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_OR_B32_e64 %16, %16, implicit $exec
FLAT_STORE_DWORD %19, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %19, %13, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
@ -560,7 +560,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@ -570,7 +570,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_ORN2_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -593,7 +593,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@ -603,7 +603,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_NAND_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -626,7 +626,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@ -636,7 +636,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_NOR_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -659,7 +659,7 @@ body: |
liveins: $sgpr0_sgpr1
%0:sgpr_64 = COPY $sgpr0_sgpr1
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
%2:sreg_32_xm0 = COPY %1.sub1
%3:sreg_32_xm0 = COPY %1.sub0
%4:sreg_32_xm0 = S_MOV_B32 61440
@ -669,7 +669,7 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 9999
%9:sreg_32_xm0 = S_XNOR_B32 killed %7, killed %8, implicit-def dead $scc
%10:vgpr_32 = COPY %9
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
@ -723,7 +723,7 @@ body: |
%2:vgpr_32 = COPY $vgpr0
%0:sgpr_64 = COPY $sgpr0_sgpr1
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
%16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %15, %subreg.sub1
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
@ -738,25 +738,25 @@ body: |
%7:sreg_32_xm0 = S_MOV_B32 1
%27:sreg_32_xm0 = S_MOV_B32 -4
%11:vgpr_32 = V_LSHLREV_B32_e64 12, %10, implicit $exec
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
%12:vgpr_32 = V_LSHLREV_B32_e64 %7, 12, implicit $exec
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
%13:vgpr_32 = V_LSHL_B32_e64 %7, 12, implicit $exec
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
%14:vgpr_32 = V_LSHL_B32_e64 12, %7, implicit $exec
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
%15:vgpr_32 = V_LSHL_B32_e64 12, %24, implicit $exec
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
%22:vgpr_32 = V_LSHL_B32_e64 %6, 12, implicit $exec
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
%23:vgpr_32 = V_LSHL_B32_e64 %6, 32, implicit $exec
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
%25:vgpr_32 = V_LSHL_B32_e32 %6, %6, implicit $exec
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
%26:vgpr_32 = V_LSHLREV_B32_e32 11, %24, implicit $exec
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
%28:vgpr_32 = V_LSHL_B32_e32 %27, %6, implicit $exec
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...

View File

@ -37,7 +37,7 @@ body: |
; GCN: S_BRANCH %bb.3
; GCN: bb.3:
; GCN: successors: %bb.4(0x80000000)
; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
; GCN: dead %18:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
; GCN: dead %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
@ -89,7 +89,7 @@ body: |
S_BRANCH %bb.3
bb.3:
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
%36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec

View File

@ -66,9 +66,9 @@ body: |
; CHECK: dead %16:vgpr_32 = COPY %11.sub0
; CHECK: undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF4]].sub0, [[DEF6]].sub0, 0, implicit $exec
; CHECK: dead undef %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 [[DEF4]].sub1, [[DEF6]].sub1, %18, 0, implicit $exec
; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, implicit $exec :: (load 8, addrspace 1)
; CHECK: dead %20:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF7]], implicit $exec
; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, implicit $exec :: (store 8, addrspace 1)
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
@ -79,7 +79,7 @@ body: |
; CHECK: bb.4:
; CHECK: successors: %bb.5(0x80000000)
; CHECK: dead %21:sreg_64 = COPY $exec
; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, implicit $exec :: (load 16, addrspace 1)
; CHECK: DBG_VALUE %22, $noreg, <0x{{[0-9a-f]+}}>, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !DILocation(line: 0, scope: <0x{{[0-9a-f]+}}>)
; CHECK: bb.5:
; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
@ -109,9 +109,9 @@ body: |
dead %16:vgpr_32 = COPY %11.sub0
undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 %6.sub0, %8.sub0, 0, implicit $exec
dead %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 %6.sub1, %8.sub1, %18, 0, implicit $exec
%6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
%6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, implicit $exec :: (load 8, addrspace 1)
dead %20:sreg_64 = V_CMP_GT_I32_e64 4, %9, implicit $exec
GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, implicit $exec :: (store 8, addrspace 1)
bb.2:
%5:vgpr_32 = COPY %13
@ -122,7 +122,7 @@ body: |
bb.4:
dead %21:sreg_64 = COPY $exec
%22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
%22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec :: (load 16, addrspace 1)
DBG_VALUE %22, $noreg, !16, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !21
bb.5:

View File

@ -11,7 +11,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0
S_NOP 0, implicit-def %4:sgpr_128, implicit %10.sub1:sgpr_128
S_CBRANCH_SCC0 %bb.3, implicit undef $scc
S_BRANCH %bb.1
@ -26,7 +26,7 @@ body: |
S_BRANCH %bb.4
bb.3:
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0
%7:sreg_32_xm0 = COPY %10.sub1:sgpr_128
%8:sreg_32_xm0 = COPY %10.sub2:sgpr_128

Some files were not shown because too many files have changed in this diff Show More