[AMDGPU] Use single cache policy operand
Replace individual operands GLC, SLC, and DLC with a single cache_policy bitmask operand. This will reduce the number of operands in MIR and I hope the amount of code. These operands are mostly 0 anyway. Additional advantage that parser will accept these flags in any order unlike now. Differential Revision: https://reviews.llvm.org/D96469
This commit is contained in:
parent
af2796c76d
commit
3bffb1cd0e
|
@ -328,20 +328,14 @@ def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastImm">,
|
|||
def gi_IMMPopCount : GICustomOperandRenderer<"renderPopcntImm">,
|
||||
GISDNodeXFormEquiv<IMMPopCount>;
|
||||
|
||||
def gi_extract_glc : GICustomOperandRenderer<"renderExtractGLC">,
|
||||
GISDNodeXFormEquiv<extract_glc>;
|
||||
|
||||
def gi_extract_slc : GICustomOperandRenderer<"renderExtractSLC">,
|
||||
GISDNodeXFormEquiv<extract_slc>;
|
||||
|
||||
def gi_extract_dlc : GICustomOperandRenderer<"renderExtractDLC">,
|
||||
GISDNodeXFormEquiv<extract_dlc>;
|
||||
def gi_extract_cpol : GICustomOperandRenderer<"renderExtractCPol">,
|
||||
GISDNodeXFormEquiv<extract_cpol>;
|
||||
|
||||
def gi_extract_swz : GICustomOperandRenderer<"renderExtractSWZ">,
|
||||
GISDNodeXFormEquiv<extract_swz>;
|
||||
|
||||
def gi_extract_sccb : GICustomOperandRenderer<"renderExtractSCCB">,
|
||||
GISDNodeXFormEquiv<extract_sccb>;
|
||||
def gi_set_glc : GICustomOperandRenderer<"renderSetGLC">,
|
||||
GISDNodeXFormEquiv<set_glc>;
|
||||
|
||||
def gi_frameindex_to_targetframeindex : GICustomOperandRenderer<"renderFrameIndex">,
|
||||
GISDNodeXFormEquiv<frameindex_to_targetframeindex>;
|
||||
|
|
|
@ -188,16 +188,13 @@ private:
|
|||
SDValue &Offset1, unsigned Size) const;
|
||||
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC, SDValue &SWZ,
|
||||
SDValue &SCCB) const;
|
||||
SDValue &Idxen, SDValue &Addr64, SDValue &CPol, SDValue &TFE,
|
||||
SDValue &SWZ) const;
|
||||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
|
||||
SDValue &SLC, SDValue &TFE, SDValue &DLC,
|
||||
SDValue &SWZ, SDValue &SCCB) const;
|
||||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
|
||||
SDValue &SLC) const;
|
||||
SDValue &SOffset, SDValue &Offset, SDValue &CPol,
|
||||
SDValue &TFE, SDValue &SWZ) const;
|
||||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset) const;
|
||||
bool SelectMUBUFScratchOffen(SDNode *Parent,
|
||||
SDValue Addr, SDValue &RSrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &ImmOffset) const;
|
||||
|
@ -206,11 +203,8 @@ private:
|
|||
SDValue &Offset) const;
|
||||
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC, SDValue &SWZ,
|
||||
SDValue &SCCB) const;
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
||||
SDValue &Offset, SDValue &SLC) const;
|
||||
SDValue &Offset, SDValue &CPol, SDValue &TFE,
|
||||
SDValue &SWZ) const;
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
||||
SDValue &Offset) const;
|
||||
|
||||
|
@ -1393,13 +1387,11 @@ bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
||||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &Addr64,
|
||||
SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC,
|
||||
SDValue &SWZ, SDValue &SCCB) const {
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset,
|
||||
SDValue &Offen, SDValue &Idxen,
|
||||
SDValue &Addr64, SDValue &CPol,
|
||||
SDValue &TFE, SDValue &SWZ) const {
|
||||
// Subtarget prefers to use flat instruction
|
||||
// FIXME: This should be a pattern predicate and not reach here
|
||||
if (Subtarget->useFlatForGlobal())
|
||||
|
@ -1407,14 +1399,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
|||
|
||||
SDLoc DL(Addr);
|
||||
|
||||
if (!GLC.getNode())
|
||||
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
if (!SLC.getNode())
|
||||
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
if (!CPol)
|
||||
CPol = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
||||
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
SCCB = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
|
||||
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
|
@ -1492,10 +1480,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
|||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &GLC,
|
||||
SDValue &SLC, SDValue &TFE,
|
||||
SDValue &DLC, SDValue &SWZ,
|
||||
SDValue &SCCB) const {
|
||||
SDValue &Offset, SDValue &CPol,
|
||||
SDValue &TFE, SDValue &SWZ) const {
|
||||
SDValue Ptr, Offen, Idxen, Addr64;
|
||||
|
||||
// addr64 bit was removed for volcanic islands.
|
||||
|
@ -1504,7 +1490,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|||
return false;
|
||||
|
||||
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE, DLC, SWZ, SCCB))
|
||||
CPol, TFE, SWZ))
|
||||
return false;
|
||||
|
||||
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
|
||||
|
@ -1523,12 +1509,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &Offset,
|
||||
SDValue &SLC) const {
|
||||
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
|
||||
SDValue GLC, TFE, DLC, SWZ, SCCB;
|
||||
SDValue &Offset) const {
|
||||
SDValue CPol, TFE, SWZ;
|
||||
|
||||
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
|
||||
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, CPol, TFE, SWZ);
|
||||
}
|
||||
|
||||
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
|
||||
|
@ -1650,15 +1634,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
|
|||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &SOffset, SDValue &Offset,
|
||||
SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC,
|
||||
SDValue &SWZ, SDValue &SCCB) const {
|
||||
SDValue &CPol, SDValue &TFE,
|
||||
SDValue &SWZ) const {
|
||||
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
||||
|
||||
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE, DLC, SWZ, SCCB))
|
||||
CPol, TFE, SWZ))
|
||||
return false;
|
||||
|
||||
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
|
||||
|
@ -1680,16 +1663,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &Soffset, SDValue &Offset
|
||||
) const {
|
||||
SDValue GLC, SLC, TFE, DLC, SWZ, SCCB;
|
||||
SDValue CPol, TFE, SWZ;
|
||||
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
|
||||
}
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &Soffset, SDValue &Offset,
|
||||
SDValue &SLC) const {
|
||||
SDValue GLC, TFE, DLC, SWZ, SCCB;
|
||||
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ, SCCB);
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, CPol, TFE, SWZ);
|
||||
}
|
||||
|
||||
// Find a load or store from corresponding pattern root.
|
||||
|
@ -2383,35 +2359,32 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
|
|||
|
||||
MachineSDNode *CmpSwap = nullptr;
|
||||
if (Subtarget->hasAddr64()) {
|
||||
SDValue SRsrc, VAddr, SOffset, Offset, SLC;
|
||||
SDValue SRsrc, VAddr, SOffset, Offset;
|
||||
|
||||
if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
|
||||
if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) {
|
||||
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
|
||||
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
|
||||
SDValue CmpVal = Mem->getOperand(2);
|
||||
SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
|
||||
SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
|
||||
|
||||
// XXX - Do we care about glue operands?
|
||||
|
||||
SDValue Ops[] = {
|
||||
CmpVal, VAddr, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
|
||||
};
|
||||
SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol,
|
||||
Mem->getChain()};
|
||||
|
||||
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
|
||||
}
|
||||
}
|
||||
|
||||
if (!CmpSwap) {
|
||||
SDValue SRsrc, SOffset, Offset, SLC;
|
||||
if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
|
||||
SDValue SRsrc, SOffset, Offset;
|
||||
if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) {
|
||||
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
|
||||
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
|
||||
|
||||
SDValue CmpVal = Mem->getOperand(2);
|
||||
SDValue GLC = CurDAG->getTargetConstant(1, SL, MVT::i1);
|
||||
SDValue Ops[] = {
|
||||
CmpVal, SRsrc, SOffset, Offset, GLC, SLC, Mem->getChain()
|
||||
};
|
||||
SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32);
|
||||
SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()};
|
||||
|
||||
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
|
||||
}
|
||||
|
|
|
@ -1460,28 +1460,6 @@ static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE,
|
|||
return TexFailCtrl == 0;
|
||||
}
|
||||
|
||||
static bool parseCachePolicy(uint64_t Value,
|
||||
bool *GLC, bool *SLC, bool *DLC, bool *SCC) {
|
||||
if (GLC) {
|
||||
*GLC = (Value & 0x1) ? 1 : 0;
|
||||
Value &= ~(uint64_t)0x1;
|
||||
}
|
||||
if (SLC) {
|
||||
*SLC = (Value & 0x2) ? 1 : 0;
|
||||
Value &= ~(uint64_t)0x2;
|
||||
}
|
||||
if (DLC) {
|
||||
*DLC = (Value & 0x4) ? 1 : 0;
|
||||
Value &= ~(uint64_t)0x4;
|
||||
}
|
||||
if (SCC) {
|
||||
*SCC = (Value & 0x10) ? 1 : 0;
|
||||
Value &= ~(uint64_t)0x10;
|
||||
}
|
||||
|
||||
return Value == 0;
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectImageIntrinsic(
|
||||
MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
|
||||
MachineBasicBlock *MBB = MI.getParent();
|
||||
|
@ -1607,22 +1585,11 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
|
|||
// TODO: Check this in verifier.
|
||||
assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
|
||||
|
||||
bool GLC = false;
|
||||
bool SLC = false;
|
||||
bool DLC = false;
|
||||
bool SCC = false;
|
||||
if (BaseOpcode->Atomic) {
|
||||
GLC = true; // TODO no-return optimization
|
||||
if (!parseCachePolicy(
|
||||
MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), nullptr,
|
||||
&SLC, IsGFX10Plus ? &DLC : nullptr, &SCC))
|
||||
return false;
|
||||
} else {
|
||||
if (!parseCachePolicy(
|
||||
MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), &GLC,
|
||||
&SLC, IsGFX10Plus ? &DLC : nullptr, &SCC))
|
||||
return false;
|
||||
}
|
||||
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
|
||||
if (BaseOpcode->Atomic)
|
||||
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
|
||||
if (CPol & ~AMDGPU::CPol::ALL)
|
||||
return false;
|
||||
|
||||
int NumVAddrRegs = 0;
|
||||
int NumVAddrDwords = 0;
|
||||
|
@ -1708,13 +1675,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
|
|||
if (IsGFX10Plus)
|
||||
MIB.addImm(DimInfo->Encoding);
|
||||
MIB.addImm(Unorm);
|
||||
if (IsGFX10Plus)
|
||||
MIB.addImm(DLC);
|
||||
else
|
||||
MIB.addImm(SCC);
|
||||
|
||||
MIB.addImm(GLC);
|
||||
MIB.addImm(SLC);
|
||||
MIB.addImm(CPol);
|
||||
MIB.addImm(IsA16 && // a16 or r128
|
||||
STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
|
||||
if (IsGFX10Plus)
|
||||
|
@ -2407,8 +2369,7 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
|
|||
MIB.addImm(0);
|
||||
|
||||
MIB.addImm(Offset);
|
||||
MIB.addImm(1); // glc
|
||||
MIB.addImm(0); // slc
|
||||
MIB.addImm(AMDGPU::CPol::GLC);
|
||||
MIB.cloneMemRefs(MI);
|
||||
|
||||
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), DstReg)
|
||||
|
@ -2991,7 +2952,7 @@ bool AMDGPUInstructionSelector::selectAMDGPU_BUFFER_ATOMIC_FADD(
|
|||
I.add(MI.getOperand(2)); // rsrc
|
||||
I.add(SOffset);
|
||||
I.addImm(Offset);
|
||||
renderExtractSLC(I, MI, 7);
|
||||
I.addImm(MI.getOperand(7).getImm()); // cpol
|
||||
I.cloneMemRefs(MI);
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
@ -3029,8 +2990,7 @@ bool AMDGPUInstructionSelector::selectGlobalAtomicFaddIntrinsic(
|
|||
.addReg(Addr.first)
|
||||
.addReg(Data)
|
||||
.addImm(Addr.second)
|
||||
.addImm(0) // SLC
|
||||
.addImm(0) // SSCB
|
||||
.addImm(0) // cpol
|
||||
.cloneMemRefs(MI);
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
@ -4153,12 +4113,9 @@ AMDGPUInstructionSelector::selectMUBUFAddr64(MachineOperand &Root) const {
|
|||
[=](MachineInstrBuilder &MIB) { // offset
|
||||
MIB.addImm(Offset);
|
||||
},
|
||||
addZeroImm, // glc
|
||||
addZeroImm, // slc
|
||||
addZeroImm, // cpol
|
||||
addZeroImm, // tfe
|
||||
addZeroImm, // dlc
|
||||
addZeroImm, // swz
|
||||
addZeroImm // scc
|
||||
addZeroImm // swz
|
||||
}};
|
||||
}
|
||||
|
||||
|
@ -4182,12 +4139,9 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
|
|||
MIB.addImm(0);
|
||||
},
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
|
||||
addZeroImm, // glc
|
||||
addZeroImm, // slc
|
||||
addZeroImm, // cpol
|
||||
addZeroImm, // tfe
|
||||
addZeroImm, // dlc
|
||||
addZeroImm, // swz
|
||||
addZeroImm // scc
|
||||
}};
|
||||
}
|
||||
|
||||
|
@ -4219,7 +4173,9 @@ AMDGPUInstructionSelector::selectMUBUFAddr64Atomic(MachineOperand &Root) const {
|
|||
[=](MachineInstrBuilder &MIB) { // offset
|
||||
MIB.addImm(Offset);
|
||||
},
|
||||
addZeroImm // slc
|
||||
[=](MachineInstrBuilder &MIB) {
|
||||
MIB.addImm(AMDGPU::CPol::GLC); // cpol
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
|
@ -4243,7 +4199,7 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
|
|||
MIB.addImm(0);
|
||||
},
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
|
||||
addZeroImm // slc
|
||||
[=](MachineInstrBuilder &MIB) { MIB.addImm(AMDGPU::CPol::GLC); } // cpol
|
||||
}};
|
||||
}
|
||||
|
||||
|
@ -4333,25 +4289,11 @@ void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
|
|||
MIB.addImm(MI.getOperand(OpIdx).getImm());
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderExtractGLC(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
assert(OpIdx >= 0 && "expected to match an immediate operand");
|
||||
MIB.addImm(MI.getOperand(OpIdx).getImm() & 1);
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderExtractSLC(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
assert(OpIdx >= 0 && "expected to match an immediate operand");
|
||||
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 1) & 1);
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderExtractDLC(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
assert(OpIdx >= 0 && "expected to match an immediate operand");
|
||||
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 2) & 1);
|
||||
MIB.addImm(MI.getOperand(OpIdx).getImm() & AMDGPU::CPol::ALL);
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
|
||||
|
@ -4361,11 +4303,11 @@ void AMDGPUInstructionSelector::renderExtractSWZ(MachineInstrBuilder &MIB,
|
|||
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 3) & 1);
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderExtractSCCB(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
void AMDGPUInstructionSelector::renderSetGLC(MachineInstrBuilder &MIB,
|
||||
const MachineInstr &MI,
|
||||
int OpIdx) const {
|
||||
assert(OpIdx >= 0 && "expected to match an immediate operand");
|
||||
MIB.addImm((MI.getOperand(OpIdx).getImm() >> 4) & 1);
|
||||
MIB.addImm(MI.getOperand(OpIdx).getImm() | AMDGPU::CPol::GLC);
|
||||
}
|
||||
|
||||
void AMDGPUInstructionSelector::renderFrameIndex(MachineInstrBuilder &MIB,
|
||||
|
|
|
@ -286,16 +286,12 @@ private:
|
|||
|
||||
void renderPopcntImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderExtractGLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderExtractSLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderExtractDLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderExtractCPol(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderExtractSWZ(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderExtractSCCB(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
void renderSetGLC(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
|
||||
void renderFrameIndex(MachineInstrBuilder &MIB, const MachineInstr &MI,
|
||||
int OpIdx) const;
|
||||
|
|
|
@ -1702,26 +1702,14 @@ static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
|
|||
return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
|
||||
}
|
||||
|
||||
static unsigned extractGLC(unsigned CachePolicy) {
|
||||
return CachePolicy & 1;
|
||||
}
|
||||
|
||||
static unsigned extractSLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 1) & 1;
|
||||
}
|
||||
|
||||
static unsigned extractDLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 2) & 1;
|
||||
static unsigned extractCPol(unsigned CachePolicy) {
|
||||
return CachePolicy & AMDGPU::CPol::ALL;
|
||||
}
|
||||
|
||||
static unsigned extractSWZ(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 3) & 1;
|
||||
}
|
||||
|
||||
static unsigned extractSCCB(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 4) & 1;
|
||||
}
|
||||
|
||||
|
||||
MachineInstr *
|
||||
AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
|
||||
|
@ -1787,12 +1775,9 @@ AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
|
|||
MIB.addUse(RSrc)
|
||||
.addUse(SOffset)
|
||||
.addImm(ImmOffset)
|
||||
.addImm(extractGLC(CachePolicy))
|
||||
.addImm(extractSLC(CachePolicy))
|
||||
.addImm(extractCPol(CachePolicy))
|
||||
.addImm(0) // tfe: FIXME: Remove from inst
|
||||
.addImm(extractDLC(CachePolicy))
|
||||
.addImm(extractSWZ(CachePolicy))
|
||||
.addImm(extractSCCB(CachePolicy))
|
||||
.cloneMemRefs(MI);
|
||||
|
||||
// FIXME: We need a way to report failure from applyMappingImpl.
|
||||
|
|
|
@ -114,10 +114,7 @@ public:
|
|||
ImmTyInstOffset,
|
||||
ImmTyOffset0,
|
||||
ImmTyOffset1,
|
||||
ImmTyDLC,
|
||||
ImmTySCCB,
|
||||
ImmTyGLC,
|
||||
ImmTySLC,
|
||||
ImmTyCPol,
|
||||
ImmTySWZ,
|
||||
ImmTyTFE,
|
||||
ImmTyD16,
|
||||
|
@ -340,13 +337,10 @@ public:
|
|||
bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
|
||||
bool isGDS() const { return isImmTy(ImmTyGDS); }
|
||||
bool isLDS() const { return isImmTy(ImmTyLDS); }
|
||||
bool isDLC() const { return isImmTy(ImmTyDLC); }
|
||||
bool isSCCB() const { return isImmTy(ImmTySCCB); }
|
||||
bool isGLC() const { return isImmTy(ImmTyGLC); }
|
||||
// "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
|
||||
// value of the GLC operand.
|
||||
bool isGLC_1() const { return isImmTy(ImmTyGLC); }
|
||||
bool isSLC() const { return isImmTy(ImmTySLC); }
|
||||
bool isCPol() const { return isImmTy(ImmTyCPol); }
|
||||
// "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and
|
||||
// forced value of the GLC operand.
|
||||
bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); }
|
||||
bool isSWZ() const { return isImmTy(ImmTySWZ); }
|
||||
bool isTFE() const { return isImmTy(ImmTyTFE); }
|
||||
bool isD16() const { return isImmTy(ImmTyD16); }
|
||||
|
@ -998,10 +992,7 @@ public:
|
|||
case ImmTyInstOffset: OS << "InstOffset"; break;
|
||||
case ImmTyOffset0: OS << "Offset0"; break;
|
||||
case ImmTyOffset1: OS << "Offset1"; break;
|
||||
case ImmTyDLC: OS << "DLC"; break;
|
||||
case ImmTySCCB: OS << "SCCB"; break;
|
||||
case ImmTyGLC: OS << "GLC"; break;
|
||||
case ImmTySLC: OS << "SLC"; break;
|
||||
case ImmTyCPol: OS << "CPol"; break;
|
||||
case ImmTySWZ: OS << "SWZ"; break;
|
||||
case ImmTyTFE: OS << "TFE"; break;
|
||||
case ImmTyD16: OS << "D16"; break;
|
||||
|
@ -1183,6 +1174,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
|
|||
bool ForcedDPP = false;
|
||||
bool ForcedSDWA = false;
|
||||
KernelScopeInfo KernelScope;
|
||||
unsigned CPolSeen;
|
||||
|
||||
/// @name Auto-generated Match Functions
|
||||
/// {
|
||||
|
@ -1267,7 +1259,7 @@ private:
|
|||
bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
|
||||
unsigned RegWidth);
|
||||
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
|
||||
bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
|
||||
bool IsAtomic, bool IsLds = false);
|
||||
void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
|
||||
bool IsGdsHardcoded);
|
||||
|
||||
|
@ -1462,6 +1454,7 @@ public:
|
|||
OperandMatchResultTy
|
||||
parseNamedBit(StringRef Name, OperandVector &Operands,
|
||||
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
|
||||
OperandMatchResultTy parseCPol(OperandVector &Operands);
|
||||
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
|
||||
StringRef &Value,
|
||||
SMLoc &StringLoc);
|
||||
|
@ -1621,17 +1614,13 @@ public:
|
|||
OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
|
||||
int64_t parseGPRIdxMacro();
|
||||
|
||||
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
|
||||
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
|
||||
void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
|
||||
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
|
||||
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
|
||||
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
|
||||
void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
|
||||
void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
|
||||
|
||||
AMDGPUOperand::Ptr defaultDLC() const;
|
||||
AMDGPUOperand::Ptr defaultSCCB() const;
|
||||
AMDGPUOperand::Ptr defaultGLC() const;
|
||||
AMDGPUOperand::Ptr defaultGLC_1() const;
|
||||
AMDGPUOperand::Ptr defaultSLC() const;
|
||||
AMDGPUOperand::Ptr defaultCPol() const;
|
||||
AMDGPUOperand::Ptr defaultCPol_GLC1() const;
|
||||
|
||||
AMDGPUOperand::Ptr defaultSMRDOffset8() const;
|
||||
AMDGPUOperand::Ptr defaultSMEMOffset() const;
|
||||
|
@ -1653,6 +1642,8 @@ public:
|
|||
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
|
||||
void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
|
||||
|
||||
void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
|
||||
|
||||
bool parseDimId(unsigned &Encoding);
|
||||
OperandMatchResultTy parseDim(OperandVector &Operands);
|
||||
OperandMatchResultTy parseDPP8(OperandVector &Operands);
|
||||
|
@ -4096,15 +4087,44 @@ bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
|
|||
bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
|
||||
const OperandVector &Operands,
|
||||
const SMLoc &IDLoc) {
|
||||
int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
|
||||
AMDGPU::OpName::glc1);
|
||||
if (GLCPos != -1) {
|
||||
// -1 is set by GLC_1 default operand. In all cases "glc" must be present
|
||||
// in the asm string, and the default value means it is not present.
|
||||
if (Inst.getOperand(GLCPos).getImm() == -1) {
|
||||
int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
|
||||
AMDGPU::OpName::cpol);
|
||||
if (CPolPos == -1)
|
||||
return true;
|
||||
|
||||
unsigned CPol = Inst.getOperand(CPolPos).getImm();
|
||||
|
||||
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
|
||||
if ((TSFlags & (SIInstrFlags::SMRD)) &&
|
||||
(CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
|
||||
Error(IDLoc, "invalid cache policy for SMRD instruction");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
|
||||
return true;
|
||||
|
||||
if (TSFlags & SIInstrFlags::IsAtomicRet) {
|
||||
if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
|
||||
Error(IDLoc, "instruction must use glc");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (CPol & CPol::GLC) {
|
||||
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
|
||||
StringRef CStr(S.getPointer());
|
||||
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
|
||||
Error(S, "instruction must not use glc");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) {
|
||||
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
|
||||
StringRef CStr(S.getPointer());
|
||||
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
|
||||
Error(S, "instruction must not use scc");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -5280,6 +5300,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
|
|||
OperandMode Mode = OperandMode_Default;
|
||||
if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
|
||||
Mode = OperandMode_NSA;
|
||||
CPolSeen = 0;
|
||||
OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
|
||||
|
||||
if (Res != MatchOperand_Success) {
|
||||
|
@ -5404,12 +5425,6 @@ AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
|
|||
Error(S, "a16 modifier is not supported on this GPU");
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) {
|
||||
Error(S, "dlc modifier is not supported on this GPU");
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB)
|
||||
return MatchOperand_ParseFail;
|
||||
|
||||
if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
|
||||
ImmTy = AMDGPUOperand::ImmTyR128A16;
|
||||
|
@ -5418,6 +5433,62 @@ AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
|
|||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
OperandMatchResultTy
|
||||
AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
|
||||
unsigned CPolOn = 0;
|
||||
unsigned CPolOff = 0;
|
||||
SMLoc S = getLoc();
|
||||
|
||||
if (trySkipId("glc"))
|
||||
CPolOn = AMDGPU::CPol::GLC;
|
||||
else if (trySkipId("noglc"))
|
||||
CPolOff = AMDGPU::CPol::GLC;
|
||||
else if (trySkipId("slc"))
|
||||
CPolOn = AMDGPU::CPol::SLC;
|
||||
else if (trySkipId("noslc"))
|
||||
CPolOff = AMDGPU::CPol::SLC;
|
||||
else if (trySkipId("dlc"))
|
||||
CPolOn = AMDGPU::CPol::DLC;
|
||||
else if (trySkipId("nodlc"))
|
||||
CPolOff = AMDGPU::CPol::DLC;
|
||||
else if (trySkipId("scc"))
|
||||
CPolOn = AMDGPU::CPol::SCC;
|
||||
else if (trySkipId("noscc"))
|
||||
CPolOff = AMDGPU::CPol::SCC;
|
||||
else
|
||||
return MatchOperand_NoMatch;
|
||||
|
||||
if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
|
||||
Error(S, "dlc modifier is not supported on this GPU");
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
|
||||
if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
|
||||
Error(S, "scc modifier is not supported on this GPU");
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
|
||||
if (CPolSeen & (CPolOn | CPolOff)) {
|
||||
Error(S, "duplicate cache policy modifier");
|
||||
return MatchOperand_ParseFail;
|
||||
}
|
||||
|
||||
CPolSeen |= (CPolOn | CPolOff);
|
||||
|
||||
for (unsigned I = 1; I != Operands.size(); ++I) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
|
||||
if (Op.isCPol()) {
|
||||
Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
}
|
||||
|
||||
Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
|
||||
AMDGPUOperand::ImmTyCPol));
|
||||
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
||||
static void addOptionalImmOperand(
|
||||
MCInst& Inst, const OperandVector& Operands,
|
||||
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
|
||||
|
@ -6835,36 +6906,43 @@ AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
|
|||
// mubuf
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
|
||||
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
|
||||
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
|
||||
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
|
||||
}
|
||||
|
||||
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const {
|
||||
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB);
|
||||
}
|
||||
|
||||
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
|
||||
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
|
||||
}
|
||||
|
||||
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
|
||||
return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
|
||||
}
|
||||
|
||||
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
|
||||
return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
|
||||
AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const {
|
||||
return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(),
|
||||
AMDGPUOperand::ImmTyCPol);
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
|
||||
const OperandVector &Operands,
|
||||
bool IsAtomic,
|
||||
bool IsAtomicReturn,
|
||||
bool IsLds) {
|
||||
const OperandVector &Operands,
|
||||
bool IsAtomic,
|
||||
bool IsLds) {
|
||||
bool IsLdsOpcode = IsLds;
|
||||
bool HasLdsModifier = false;
|
||||
OptionalImmIndexMap OptionalIdx;
|
||||
assert(IsAtomicReturn ? IsAtomic : true);
|
||||
unsigned FirstOperandIdx = 1;
|
||||
bool IsAtomicReturn = false;
|
||||
|
||||
if (IsAtomic) {
|
||||
for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
|
||||
if (!Op.isCPol())
|
||||
continue;
|
||||
IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!IsAtomicReturn) {
|
||||
int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
|
||||
if (NewOpc != -1)
|
||||
Inst.setOpcode(NewOpc);
|
||||
}
|
||||
|
||||
IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
|
||||
SIInstrFlags::IsAtomicRet;
|
||||
}
|
||||
|
||||
for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
|
||||
|
@ -6915,19 +6993,12 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
|
|||
}
|
||||
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
|
||||
if (!IsAtomic || IsAtomicReturn) {
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
|
||||
IsAtomicReturn ? -1 : 0);
|
||||
}
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
|
||||
|
||||
if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
|
||||
}
|
||||
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
|
||||
|
@ -6962,12 +7033,9 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
|
|||
addOptionalImmOperand(Inst, Operands, OptionalIdx,
|
||||
AMDGPUOperand::ImmTyOffset);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -7009,16 +7077,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
|
|||
if (IsGFX10Plus)
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
|
||||
|
||||
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1)
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx,
|
||||
AMDGPUOperand::ImmTySCCB);
|
||||
|
||||
if (IsGFX10Plus)
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
|
||||
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
|
||||
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
|
||||
|
@ -7034,6 +7093,61 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
|
|||
cvtMIMG(Inst, Operands, true);
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
|
||||
OptionalImmIndexMap OptionalIdx;
|
||||
bool IsAtomicReturn = false;
|
||||
|
||||
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
|
||||
if (!Op.isCPol())
|
||||
continue;
|
||||
IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!IsAtomicReturn) {
|
||||
int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
|
||||
if (NewOpc != -1)
|
||||
Inst.setOpcode(NewOpc);
|
||||
}
|
||||
|
||||
IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
|
||||
SIInstrFlags::IsAtomicRet;
|
||||
|
||||
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
|
||||
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
|
||||
|
||||
// Add the register arguments
|
||||
if (Op.isReg()) {
|
||||
Op.addRegOperands(Inst, 1);
|
||||
if (IsAtomicReturn && i == 1)
|
||||
Op.addRegOperands(Inst, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle the case where soffset is an immediate
|
||||
if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
|
||||
Op.addImmOperands(Inst, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle tokens like 'offen' which are sometimes hard-coded into the
|
||||
// asm string. There are no MCInst operands for these.
|
||||
if (Op.isToken()) {
|
||||
continue;
|
||||
}
|
||||
assert(Op.isImm());
|
||||
|
||||
// Handle optional arguments
|
||||
OptionalIdx[Op.getImmTy()] = i;
|
||||
}
|
||||
|
||||
if ((int)Inst.getNumOperands() <=
|
||||
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
|
||||
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
|
||||
}
|
||||
|
||||
void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
|
||||
const OperandVector &Operands) {
|
||||
for (unsigned I = 1; I < Operands.size(); ++I) {
|
||||
|
@ -7127,10 +7241,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
|
|||
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
|
||||
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
|
||||
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
|
||||
{"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
|
||||
{"scc", AMDGPUOperand::ImmTySCCB, true, nullptr},
|
||||
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
|
||||
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
|
||||
{"", AMDGPUOperand::ImmTyCPol, false, nullptr},
|
||||
{"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
|
||||
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
|
||||
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
|
||||
|
@ -7213,6 +7324,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
|
|||
Op.ConvertResult);
|
||||
} else if (Op.Type == AMDGPUOperand::ImmTyDim) {
|
||||
res = parseDim(Operands);
|
||||
} else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
|
||||
res = parseCPol(Operands);
|
||||
} else {
|
||||
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
|
||||
}
|
||||
|
@ -8078,8 +8191,6 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
|
|||
return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
|
||||
case MCK_lds:
|
||||
return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
|
||||
case MCK_glc:
|
||||
return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
|
||||
case MCK_idxen:
|
||||
return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
|
||||
case MCK_offen:
|
||||
|
|
|
@ -6,16 +6,14 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
||||
def MUBUFAddr64 : ComplexPattern<i64, 10, "SelectMUBUFAddr64">;
|
||||
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
|
||||
def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
|
||||
def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
|
||||
|
||||
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
|
||||
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
|
||||
|
||||
def MUBUFOffset : ComplexPattern<i64, 9, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
|
||||
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetAtomic : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
|
||||
|
||||
def BUFAddrKind {
|
||||
int Offset = 0;
|
||||
|
@ -124,20 +122,17 @@ class MTBUF_Real <MTBUF_Pseudo ps> :
|
|||
let TSFlags = ps.TSFlags;
|
||||
|
||||
bits<12> offset;
|
||||
bits<1> glc;
|
||||
bits<1> dlc;
|
||||
bits<5> cpol;
|
||||
bits<7> format;
|
||||
bits<8> vaddr;
|
||||
bits<10> vdata;
|
||||
bits<7> srsrc;
|
||||
bits<1> slc;
|
||||
bits<1> tfe;
|
||||
bits<8> soffset;
|
||||
|
||||
bits<4> dfmt = format{3-0};
|
||||
bits<3> nfmt = format{6-4};
|
||||
|
||||
bits<1> sccb;
|
||||
// GFX90A+ only: instruction uses AccVGPR for data
|
||||
// Bit superceedes tfe.
|
||||
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
|
||||
|
@ -150,17 +145,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
|
|||
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
|
||||
dag InsNoData = !if(!empty(vaddrList),
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb),
|
||||
offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz),
|
||||
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb)
|
||||
offset:$offset, FORMAT:$format, CPol:$cpol, TFE:$tfe, SWZ:$swz)
|
||||
);
|
||||
dag InsData = !if(!empty(vaddrList),
|
||||
(ins vdata_op:$vdata, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
|
||||
SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb),
|
||||
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol,
|
||||
TFE:$tfe, SWZ:$swz),
|
||||
(ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
|
||||
SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb)
|
||||
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol,
|
||||
TFE:$tfe, SWZ:$swz)
|
||||
);
|
||||
dag ret = !if(!empty(vdataList), InsNoData, InsData);
|
||||
}
|
||||
|
@ -212,7 +207,7 @@ class MTBUF_Load_Pseudo <string opName,
|
|||
: MTBUF_Pseudo<opName,
|
||||
(outs getLdStRegisterOperand<vdataClass>.ret:$vdata),
|
||||
getMTBUFIns<addrKindCopy>.ret,
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
|
||||
pattern>,
|
||||
MTBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
|
@ -254,7 +249,7 @@ class MTBUF_Store_Pseudo <string opName,
|
|||
: MTBUF_Pseudo<opName,
|
||||
(outs),
|
||||
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
|
||||
pattern>,
|
||||
MTBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
|
@ -357,16 +352,13 @@ class MUBUF_Real <MUBUF_Pseudo ps> :
|
|||
let UseNamedOperandTable = ps.UseNamedOperandTable;
|
||||
|
||||
bits<12> offset;
|
||||
bits<1> glc;
|
||||
bits<1> dlc;
|
||||
bits<5> cpol;
|
||||
bits<8> vaddr;
|
||||
bits<10> vdata;
|
||||
bits<7> srsrc;
|
||||
bits<1> slc;
|
||||
bits<1> tfe;
|
||||
bits<8> soffset;
|
||||
|
||||
bits<1> sccb;
|
||||
// GFX90A+ only: instruction uses AccVGPR for data
|
||||
// Bit superceedes tfe.
|
||||
bits<1> acc = !if(ps.has_vdata, vdata{9}, 0);
|
||||
|
@ -410,19 +402,19 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
|
|||
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
|
||||
dag InsNoData = !if(!empty(vaddrList),
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, GLC:$glc, SLC:$slc),
|
||||
offset:$offset, CPol:$cpol),
|
||||
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, GLC:$glc, SLC:$slc)
|
||||
offset:$offset, CPol:$cpol)
|
||||
);
|
||||
dag InsData = !if(!empty(vaddrList),
|
||||
(ins vdata_op:$vdata, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc),
|
||||
SCSrc_b32:$soffset, offset:$offset, CPol:$cpol),
|
||||
(ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc)
|
||||
SCSrc_b32:$soffset, offset:$offset, CPol:$cpol)
|
||||
);
|
||||
dag ret = !con(
|
||||
!if(!empty(vdataList), InsNoData, InsData),
|
||||
!if(isLds, (ins DLC:$dlc, SWZ:$swz, SCCB_0:$sccb), (ins TFE:$tfe, DLC:$dlc, SWZ:$swz, SCCB_0:$sccb))
|
||||
!if(isLds, (ins SWZ:$swz), (ins TFE:$tfe, SWZ:$swz))
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -495,8 +487,8 @@ class MUBUF_Load_Pseudo <string opName,
|
|||
(outs vdata_op:$vdata),
|
||||
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
|
||||
!if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))),
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
|
||||
!if(isLds, " lds", "$tfe") # "$dlc$swz$sccb",
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol" #
|
||||
!if(isLds, " lds", "$tfe") # "$swz",
|
||||
pattern>,
|
||||
MUBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # !if(isLds, "_lds", "") #
|
||||
|
@ -514,15 +506,15 @@ class MUBUF_Load_Pseudo <string opName,
|
|||
}
|
||||
|
||||
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
|
||||
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
|
||||
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
|
||||
(load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
|
||||
>;
|
||||
|
||||
class MUBUF_Addr64_Load_Pat <Instruction inst,
|
||||
ValueType load_vt = i32,
|
||||
SDPatternOperator ld = null_frag> : Pat <
|
||||
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
|
||||
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
|
||||
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
|
||||
>;
|
||||
|
||||
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
|
||||
|
@ -575,7 +567,7 @@ class MUBUF_Store_Pseudo <string opName,
|
|||
: MUBUF_Pseudo<opName,
|
||||
(outs),
|
||||
getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret,
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz$sccb",
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol$tfe$swz",
|
||||
pattern>,
|
||||
MUBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
|
@ -593,12 +585,12 @@ multiclass MUBUF_Pseudo_Stores<string opName,
|
|||
|
||||
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt,
|
||||
[(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
|
||||
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
|
||||
MUBUFAddr64Table<0, NAME>;
|
||||
|
||||
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt,
|
||||
[(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
|
||||
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
|
||||
MUBUFAddr64Table<1, NAME>;
|
||||
|
||||
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>;
|
||||
|
@ -616,8 +608,8 @@ multiclass MUBUF_Pseudo_Stores<string opName,
|
|||
class MUBUF_Pseudo_Store_Lds<string opName>
|
||||
: MUBUF_Pseudo<opName,
|
||||
(outs),
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz, SCCB_0:$sccb),
|
||||
" $srsrc, $soffset$offset lds$glc$slc$swz$sccb"> {
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz),
|
||||
" $srsrc, $soffset$offset lds$cpol$swz"> {
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let maybeAtomic = 1;
|
||||
|
@ -638,15 +630,15 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
|
|||
dag ret = !if(vdata_in,
|
||||
!if(!empty(vaddrList),
|
||||
(ins vdata_op:$vdata_in,
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc),
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol),
|
||||
(ins vdata_op:$vdata_in, vaddrClass:$vaddr,
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC_1:$glc1, SLC:$slc)
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_GLC1:$cpol)
|
||||
),
|
||||
!if(!empty(vaddrList),
|
||||
(ins vdata_op:$vdata,
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc),
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol),
|
||||
(ins vdata_op:$vdata, vaddrClass:$vaddr,
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, SLC:$slc)
|
||||
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol)
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -689,6 +681,7 @@ class MUBUF_Atomic_Pseudo<string opName,
|
|||
let has_tfe = 0;
|
||||
let has_sccb = 0;
|
||||
let maybeAtomic = 1;
|
||||
let AsmMatchConverter = "cvtMubufAtomic";
|
||||
}
|
||||
|
||||
class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
|
||||
|
@ -700,7 +693,7 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
|
|||
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
|
||||
(outs),
|
||||
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret,
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$slc",
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol",
|
||||
pattern>,
|
||||
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
|
@ -708,7 +701,6 @@ class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
|
|||
let dlc_value = 0;
|
||||
let sccb_value = 0;
|
||||
let IsAtomicNoRet = 1;
|
||||
let AsmMatchConverter = "cvtMubufAtomic";
|
||||
}
|
||||
|
||||
class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
|
||||
|
@ -721,7 +713,7 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
|
|||
: MUBUF_Atomic_Pseudo<opName, addrKindCopy,
|
||||
(outs vdata_op:$vdata),
|
||||
getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret,
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc1$slc",
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$cpol",
|
||||
pattern>,
|
||||
AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
|
||||
let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
|
||||
|
@ -731,7 +723,6 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
|
|||
let IsAtomicRet = 1;
|
||||
let Constraints = "$vdata = $vdata_in";
|
||||
let DisableEncoding = "$vdata_in";
|
||||
let AsmMatchConverter = "cvtMubufAtomicReturn";
|
||||
}
|
||||
|
||||
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
|
||||
|
@ -766,15 +757,15 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
|
|||
let FPAtomic = isFP in
|
||||
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
||||
[(set vdataType:$vdata,
|
||||
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
|
||||
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset),
|
||||
vdataType:$vdata_in))]>,
|
||||
MUBUFAddr64Table <0, NAME # "_RTN">;
|
||||
|
||||
let FPAtomic = isFP in
|
||||
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
||||
[(set vdataType:$vdata,
|
||||
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
|
||||
vdataType:$vdata_in))]>,
|
||||
(atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
|
||||
vdataType:$vdata_in))]>,
|
||||
MUBUFAddr64Table <1, NAME # "_RTN">;
|
||||
|
||||
let FPAtomic = isFP in
|
||||
|
@ -1210,24 +1201,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
(vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$auxiliary, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$auxiliary, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$auxiliary, timm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1236,8 +1224,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1296,26 +1283,21 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
(st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$auxiliary, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$auxiliary, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_glc $auxiliary),
|
||||
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$auxiliary, timm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_glc $auxiliary),
|
||||
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(as_i16timm $offset), (extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1324,9 +1306,8 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
|
||||
getVregSrcForVT<vt>.ret:$vdata,
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_glc $auxiliary),
|
||||
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary),
|
||||
0, (extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1386,7 +1367,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
|
|||
timm:$offset, timm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN)
|
||||
getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_slc $cachepolicy))
|
||||
(as_i16timm $offset), (set_glc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1394,7 +1375,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
|
|||
timm:$offset, timm:$cachepolicy, timm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
|
||||
VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_slc $cachepolicy))
|
||||
(as_i16timm $offset), (set_glc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1402,7 +1383,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
|
|||
i32:$soffset, timm:$offset, timm:$cachepolicy, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) getVregSrcForVT<vt>.ret:$vdata_in,
|
||||
VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_slc $cachepolicy))
|
||||
(as_i16timm $offset), (set_glc $cachepolicy))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1412,7 +1393,7 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
|
|||
getVregSrcForVT<vt>.ret:$vdata_in,
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(extract_slc $cachepolicy))
|
||||
(set_glc $cachepolicy))
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1460,7 +1441,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
|
|||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_slc $cachepolicy))
|
||||
(as_i16timm $offset), $cachepolicy)
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1468,7 +1449,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
|
|||
0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_slc $cachepolicy))
|
||||
(as_i16timm $offset), $cachepolicy)
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1476,7 +1457,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
|
|||
i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (extract_slc $cachepolicy))
|
||||
(as_i16timm $offset), $cachepolicy)
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1486,7 +1467,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
|
|||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
|
||||
getVregSrcForVT<vt>.ret:$vdata_in,
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy))
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), $cachepolicy)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1512,7 +1493,7 @@ def : GCNPat<
|
|||
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(extract_slc $cachepolicy)), VReg_64)), sub0)
|
||||
(set_glc $cachepolicy)), VReg_64)), sub0)
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1523,7 +1504,8 @@ def : GCNPat<
|
|||
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
|
||||
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
|
||||
VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
|
||||
VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(set_glc $cachepolicy)), VReg_64)),
|
||||
sub0)
|
||||
>;
|
||||
|
||||
|
@ -1535,7 +1517,8 @@ def : GCNPat<
|
|||
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS
|
||||
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
|
||||
VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
|
||||
VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(set_glc $cachepolicy)), VReg_64)),
|
||||
sub0)
|
||||
>;
|
||||
|
||||
|
@ -1548,28 +1531,28 @@ def : GCNPat<
|
|||
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_slc $cachepolicy)), VReg_64)),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(set_glc $cachepolicy)), VReg_64)),
|
||||
sub0)
|
||||
>;
|
||||
|
||||
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
|
||||
PatFrag constant_ld> : GCNPat <
|
||||
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
|
||||
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
|
||||
>;
|
||||
|
||||
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
|
||||
ValueType vt, PatFrag atomic_ld> {
|
||||
def : GCNPat <
|
||||
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$slc))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0)
|
||||
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
|
||||
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0)
|
||||
(vt (atomic_ld (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset))),
|
||||
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1590,8 +1573,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
|
|||
|
||||
def : GCNPat <
|
||||
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
|
||||
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
|
||||
(Instr_OFFSET $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1614,12 +1597,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
|
|||
def : GCNPat <
|
||||
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset))),
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1629,12 +1612,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
|
|||
ValueType vt, PatFrag ld_frag> {
|
||||
def : GCNPat <
|
||||
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1679,14 +1662,13 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
|
|||
ValueType vt, PatFrag atomic_st> {
|
||||
// Store follows atomic op convention so address is first
|
||||
def : GCNPat <
|
||||
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$slc), vt:$val),
|
||||
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0, 0)
|
||||
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val),
|
||||
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
|
||||
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0, 0)
|
||||
(atomic_st (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
|
||||
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
let SubtargetPredicate = isGFX6GFX7 in {
|
||||
|
@ -1700,8 +1682,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
|
|||
|
||||
def : GCNPat <
|
||||
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)),
|
||||
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
|
||||
i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)),
|
||||
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1715,13 +1697,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
|
|||
def : GCNPat <
|
||||
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset)),
|
||||
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
|
||||
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
|
||||
u16imm:$offset)),
|
||||
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, 0)
|
||||
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1767,8 +1749,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
timm:$format, timm:$auxiliary, 0)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1776,8 +1757,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
timm:$format, timm:$auxiliary, timm)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1785,8 +1765,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
timm:$format, timm:$auxiliary, 0)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1796,8 +1775,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
|
||||
(as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1835,8 +1813,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
timm:$format, timm:$auxiliary, 0),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1844,8 +1821,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
timm:$format, timm:$auxiliary, timm),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1853,8 +1829,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
timm:$format, timm:$auxiliary, 0),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
|
||||
(as_i16timm $offset), (as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
|
@ -1864,8 +1839,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
getVregSrcForVT<vt>.ret:$vdata,
|
||||
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
|
||||
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format),
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary), (extract_sccb $auxiliary))
|
||||
(extract_cpol $auxiliary), 0, (extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -1907,21 +1881,21 @@ class Base_MUBUF_Real_gfx6_gfx7_gfx10<bits<7> op, MUBUF_Pseudo ps, int ef> :
|
|||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
|
||||
let Inst{16} = ps.lds;
|
||||
let Inst{24-18} = op;
|
||||
let Inst{31-26} = 0x38;
|
||||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
|
||||
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
||||
let Inst{54} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
|
||||
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
||||
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
||||
}
|
||||
|
||||
class MUBUF_Real_gfx10<bits<8> op, MUBUF_Pseudo ps> :
|
||||
Base_MUBUF_Real_gfx6_gfx7_gfx10<op{6-0}, ps, SIEncodingFamily.GFX10> {
|
||||
let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
|
||||
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
|
||||
let Inst{25} = op{7};
|
||||
}
|
||||
|
||||
|
@ -1973,16 +1947,33 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
|
|||
}
|
||||
multiclass MUBUF_Real_Atomics_RTN_gfx10<bits<8> op> {
|
||||
def _BOTHEN_RTN_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>,
|
||||
AtomicNoRet<NAME # "_BOTHEN_gfx10", 1>;
|
||||
def _IDXEN_RTN_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>,
|
||||
AtomicNoRet<NAME # "_IDXEN_gfx10", 1>;
|
||||
def _OFFEN_RTN_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>,
|
||||
AtomicNoRet<NAME # "_OFFEN_gfx10", 1>;
|
||||
def _OFFSET_RTN_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>,
|
||||
AtomicNoRet<NAME # "_OFFSET_gfx10", 1>;
|
||||
}
|
||||
multiclass MUBUF_Real_Atomics_gfx10<bits<8> op> :
|
||||
MUBUF_Real_AllAddr_gfx10<op>, MUBUF_Real_Atomics_RTN_gfx10<op>;
|
||||
MUBUF_Real_Atomics_RTN_gfx10<op> {
|
||||
def _BOTHEN_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
|
||||
AtomicNoRet<NAME # "_BOTHEN_gfx10", 0>;
|
||||
def _IDXEN_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
|
||||
AtomicNoRet<NAME # "_IDXEN_gfx10", 0>;
|
||||
def _OFFEN_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
|
||||
AtomicNoRet<NAME # "_OFFEN_gfx10", 0>;
|
||||
def _OFFSET_gfx10 :
|
||||
MUBUF_Real_gfx10<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
|
||||
AtomicNoRet<NAME # "_OFFSET_gfx10", 0>;
|
||||
}
|
||||
} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
|
||||
|
||||
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Real_AllAddr_gfx10<0x019>;
|
||||
|
@ -2062,18 +2053,38 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
|
|||
def _LDS_BOTHEN_gfx6_gfx7 : MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_LDS_BOTHEN")>,
|
||||
MUBUFLdsTable<1, NAME # "_BOTHEN_gfx6_gfx7">;
|
||||
}
|
||||
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> :
|
||||
MUBUF_Real_AllAddr_gfx6_gfx7<op> {
|
||||
multiclass MUBUF_Real_Atomics_gfx6_gfx7<bits<8> op> {
|
||||
def _ADDR64_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>,
|
||||
AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 0>;
|
||||
def _BOTHEN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>,
|
||||
AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 0>;
|
||||
def _IDXEN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>,
|
||||
AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 0>;
|
||||
def _OFFEN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>,
|
||||
AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 0>;
|
||||
def _OFFSET_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>,
|
||||
AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 0>;
|
||||
|
||||
def _ADDR64_RTN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>;
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64_RTN")>,
|
||||
AtomicNoRet<NAME # "_ADDR64_gfx6_gfx7", 1>;
|
||||
def _BOTHEN_RTN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>;
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN_RTN")>,
|
||||
AtomicNoRet<NAME # "_BOTHEN_gfx6_gfx7", 1>;
|
||||
def _IDXEN_RTN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>,
|
||||
AtomicNoRet<NAME # "_IDXEN_gfx6_gfx7", 1>;
|
||||
def _OFFEN_RTN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>,
|
||||
AtomicNoRet<NAME # "_OFFEN_gfx6_gfx7", 1>;
|
||||
def _OFFSET_RTN_gfx6_gfx7 :
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
|
||||
MUBUF_Real_gfx6_gfx7<op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>,
|
||||
AtomicNoRet<NAME # "_OFFSET_gfx6_gfx7", 1>;
|
||||
}
|
||||
} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
|
||||
|
||||
|
@ -2162,13 +2173,13 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
|
|||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
|
||||
let Inst{18-16} = op;
|
||||
let Inst{31-26} = 0x3a; //encoding
|
||||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
|
||||
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
||||
let Inst{54} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
|
||||
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
||||
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
||||
}
|
||||
|
@ -2179,7 +2190,7 @@ class Base_MTBUF_Real_gfx6_gfx7_gfx10<bits<3> op, MTBUF_Pseudo ps, int ef> :
|
|||
|
||||
class MTBUF_Real_gfx10<bits<4> op, MTBUF_Pseudo ps> :
|
||||
Base_MTBUF_Real_gfx6_gfx7_gfx10<op{2-0}, ps, SIEncodingFamily.GFX10> {
|
||||
let Inst{15} = !if(ps.has_dlc, dlc, ps.dlc_value);
|
||||
let Inst{15} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlc_value);
|
||||
let Inst{25-19} = format;
|
||||
let Inst{53} = op{3};
|
||||
}
|
||||
|
@ -2251,15 +2262,17 @@ defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Real_AllAddr_gfx6_gfx7_gfx10<0x007>;
|
|||
class MUBUF_Real_Base_vi <bits<7> op, MUBUF_Pseudo ps, int Enc> :
|
||||
MUBUF_Real<ps>,
|
||||
Enc64,
|
||||
SIMCInstr<ps.PseudoInstr, Enc> {
|
||||
SIMCInstr<ps.PseudoInstr, Enc>,
|
||||
AtomicNoRet<!subst("_RTN","",NAME), !if(ps.IsAtomicNoRet, 0,
|
||||
!if(ps.IsAtomicRet, 1, ?))> {
|
||||
|
||||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{15} = !if(ps.has_sccb, sccb, ps.sccb_value);
|
||||
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
|
||||
let Inst{15} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
|
||||
let Inst{16} = ps.lds;
|
||||
let Inst{17} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
|
||||
let Inst{24-18} = op;
|
||||
let Inst{31-26} = 0x38; //encoding
|
||||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
|
@ -2290,7 +2303,7 @@ multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps> {
|
|||
def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
|
||||
}
|
||||
|
||||
multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
|
||||
multiclass MUBUF_Real_AllAddr_vi<bits<7> op, bit isAtomic = 0, bit isAtomicRet = 0> {
|
||||
defm _OFFSET : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
|
||||
defm _OFFEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
|
||||
defm _IDXEN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
|
||||
|
@ -2346,9 +2359,9 @@ class MUBUF_Real_gfx80 <bits<7> op, MUBUF_Pseudo ps> :
|
|||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
|
||||
let Inst{16} = ps.lds;
|
||||
let Inst{17} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{17} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
|
||||
let Inst{24-18} = op;
|
||||
let Inst{31-26} = 0x38; //encoding
|
||||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
|
@ -2366,7 +2379,7 @@ multiclass MUBUF_Real_AllAddr_gfx80<bits<7> op> {
|
|||
}
|
||||
|
||||
multiclass MUBUF_Real_Atomic_vi<bits<7> op> :
|
||||
MUBUF_Real_AllAddr_vi<op> {
|
||||
MUBUF_Real_AllAddr_vi<op, 1, 0> {
|
||||
defm _OFFSET_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET_RTN")>;
|
||||
defm _OFFEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN_RTN")>;
|
||||
defm _IDXEN_RTN : MUBUF_Real_vi_gfx90a <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN_RTN")>;
|
||||
|
@ -2488,7 +2501,7 @@ class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
|
|||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
|
||||
let Inst{18-15} = op;
|
||||
let Inst{22-19} = dfmt;
|
||||
let Inst{25-23} = nfmt;
|
||||
|
@ -2496,8 +2509,9 @@ class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
|
|||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
|
||||
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
||||
let Inst{53} = !if(ps.has_sccb, sccb, ps.sccb_value);
|
||||
let Inst{54} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{53} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccb_value);
|
||||
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
|
||||
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
||||
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
||||
}
|
||||
|
||||
|
@ -2540,7 +2554,7 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
|
|||
let Inst{11-0} = !if(ps.has_offset, offset, ?);
|
||||
let Inst{12} = ps.offen;
|
||||
let Inst{13} = ps.idxen;
|
||||
let Inst{14} = !if(ps.has_glc, glc, ps.glc_value);
|
||||
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glc_value);
|
||||
let Inst{18-15} = op;
|
||||
let Inst{22-19} = dfmt;
|
||||
let Inst{25-23} = nfmt;
|
||||
|
@ -2548,7 +2562,7 @@ class MTBUF_Real_gfx80 <bits<4> op, MTBUF_Pseudo ps> :
|
|||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
let Inst{47-40} = !if(ps.has_vdata, vdata{7-0}, ?);
|
||||
let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
|
||||
let Inst{54} = !if(ps.has_slc, slc, ?);
|
||||
let Inst{54} = !if(ps.has_slc, cpol{CPolBit.SLC}, ?);
|
||||
let Inst{55} = !if(ps.has_tfe, tfe, ?);
|
||||
let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
|
||||
}
|
||||
|
|
|
@ -541,9 +541,20 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
|||
}
|
||||
|
||||
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
|
||||
(SIInstrFlags::MUBUF | SIInstrFlags::FLAT)) &&
|
||||
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::glc1) != -1) {
|
||||
insertNamedMCOperand(MI, MCOperand::createImm(1), AMDGPU::OpName::glc1);
|
||||
(SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD))) {
|
||||
int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
|
||||
AMDGPU::OpName::cpol);
|
||||
if (CPolPos != -1) {
|
||||
unsigned CPol =
|
||||
(MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
|
||||
AMDGPU::CPol::GLC : 0;
|
||||
if (MI.getNumOperands() <= (unsigned)CPolPos) {
|
||||
insertNamedMCOperand(MI, MCOperand::createImm(CPol),
|
||||
AMDGPU::OpName::cpol);
|
||||
} else if (CPol) {
|
||||
MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
|
||||
|
@ -559,20 +570,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
|||
}
|
||||
}
|
||||
|
||||
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
|
||||
(SIInstrFlags::FLAT |
|
||||
SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
|
||||
if (!isGFX10()) {
|
||||
int DLCOpIdx =
|
||||
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dlc);
|
||||
if (DLCOpIdx != -1) {
|
||||
auto DLCIter = MI.begin();
|
||||
std::advance(DLCIter, DLCOpIdx);
|
||||
MI.insert(DLCIter, MCOperand::createImm(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
|
||||
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))) {
|
||||
int SWZOpIdx =
|
||||
|
|
|
@ -94,12 +94,7 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
|
|||
bits<7> saddr;
|
||||
bits<10> vdst;
|
||||
|
||||
bits<1> slc;
|
||||
bits<1> glc;
|
||||
bits<1> dlc;
|
||||
|
||||
// Only valid on gfx90a+
|
||||
bits<1> sccb;
|
||||
bits<5> cpol;
|
||||
|
||||
// Only valid on gfx9
|
||||
bits<1> lds = 0; // XXX - What does this actually do?
|
||||
|
@ -122,8 +117,8 @@ class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
|
|||
let Inst{13} = lds;
|
||||
let Inst{15-14} = seg;
|
||||
|
||||
let Inst{16} = !if(ps.has_glc, glc, ps.glcValue);
|
||||
let Inst{17} = slc;
|
||||
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue);
|
||||
let Inst{17} = cpol{CPolBit.SLC};
|
||||
let Inst{24-18} = op;
|
||||
let Inst{31-26} = 0x37; // Encoding.
|
||||
let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
|
||||
|
@ -156,9 +151,9 @@ class FLAT_Load_Pseudo <string opName, RegisterClass regClass,
|
|||
(ins VReg_64:$vaddr)),
|
||||
(ins flat_offset:$offset)),
|
||||
// FIXME: Operands with default values do not work with following non-optional operands.
|
||||
!if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, vdata_op:$vdst_in),
|
||||
(ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
|
||||
" $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> {
|
||||
!if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in),
|
||||
(ins CPol_0:$cpol))),
|
||||
" $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
|
||||
let has_data = 0;
|
||||
let mayLoad = 1;
|
||||
let has_saddr = HasSaddr;
|
||||
|
@ -178,8 +173,8 @@ class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass,
|
|||
!if(EnableSaddr,
|
||||
(ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr),
|
||||
(ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)),
|
||||
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb)),
|
||||
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc$sccb"> {
|
||||
(ins flat_offset:$offset, CPol_0:$cpol)),
|
||||
" $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> {
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let has_vdst = 0;
|
||||
|
@ -203,9 +198,9 @@ class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
|
|||
opName,
|
||||
(outs regClass:$vdst),
|
||||
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
|
||||
(ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
|
||||
(ins flat_offset:$offset, CPol_0:$cpol),
|
||||
!if(HasTiedOutput, (ins regClass:$vdst_in), (ins))),
|
||||
" $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
|
||||
" $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
|
||||
let is_flat_global = 1;
|
||||
let has_data = 0;
|
||||
let mayLoad = 1;
|
||||
|
@ -241,8 +236,8 @@ class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
|
|||
opName,
|
||||
(outs),
|
||||
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
|
||||
(ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc, SCCB_0:$sccb)),
|
||||
" $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
|
||||
(ins flat_offset:$offset, CPol:$cpol)),
|
||||
" $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
|
||||
let is_flat_global = 1;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
|
@ -280,9 +275,9 @@ class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass,
|
|||
!if(EnableVaddr,
|
||||
(ins VGPR_32:$vaddr, flat_offset:$offset),
|
||||
(ins flat_offset:$offset))),
|
||||
!if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, SCCB:$sccb, getLdStRegisterOperand<regClass>.ret:$vdst_in),
|
||||
(ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
|
||||
" $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
|
||||
!if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in),
|
||||
(ins CPol_0:$cpol))),
|
||||
" $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
|
||||
let has_data = 0;
|
||||
let mayLoad = 1;
|
||||
let has_saddr = 1;
|
||||
|
@ -301,11 +296,11 @@ class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit En
|
|||
opName,
|
||||
(outs),
|
||||
!if(EnableSaddr,
|
||||
(ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
|
||||
(ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol),
|
||||
!if(EnableVaddr,
|
||||
(ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb),
|
||||
(ins vdata_op:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc, SCCB_0:$sccb))),
|
||||
" "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc$sccb"> {
|
||||
(ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol),
|
||||
(ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))),
|
||||
" "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> {
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let has_vdst = 0;
|
||||
|
@ -382,8 +377,8 @@ multiclass FLAT_Atomic_Pseudo<
|
|||
RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
|
||||
def "" : FLAT_AtomicNoRet_Pseudo <opName,
|
||||
(outs),
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
|
||||
" $vaddr, $vdata$offset$slc$sccb">,
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
|
||||
" $vaddr, $vdata$offset$cpol">,
|
||||
GlobalSaddrTable<0, opName>,
|
||||
AtomicNoRet <opName, 0> {
|
||||
let PseudoInstr = NAME;
|
||||
|
@ -393,8 +388,8 @@ multiclass FLAT_Atomic_Pseudo<
|
|||
|
||||
def _RTN : FLAT_AtomicRet_Pseudo <opName,
|
||||
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
|
||||
" $vdst, $vaddr, $vdata$offset$glc1$slc$sccb",
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
|
||||
" $vdst, $vaddr, $vdata$offset$cpol",
|
||||
[(set vt:$vdst,
|
||||
(atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
|
||||
GlobalSaddrTable<0, opName#"_rtn">,
|
||||
|
@ -416,8 +411,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
|
|||
|
||||
def "" : FLAT_AtomicNoRet_Pseudo <opName,
|
||||
(outs),
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
|
||||
" $vaddr, $vdata, off$offset$slc$sccb">,
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol),
|
||||
" $vaddr, $vdata, off$offset$cpol">,
|
||||
GlobalSaddrTable<0, opName>,
|
||||
AtomicNoRet <opName, 0> {
|
||||
let has_saddr = 1;
|
||||
|
@ -427,8 +422,8 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
|
|||
|
||||
def _SADDR : FLAT_AtomicNoRet_Pseudo <opName,
|
||||
(outs),
|
||||
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc, SCCB_0:$sccb),
|
||||
" $vaddr, $vdata, $saddr$offset$slc$sccb">,
|
||||
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol),
|
||||
" $vaddr, $vdata, $saddr$offset$cpol">,
|
||||
GlobalSaddrTable<1, opName>,
|
||||
AtomicNoRet <opName#"_saddr", 0> {
|
||||
let has_saddr = 1;
|
||||
|
@ -451,8 +446,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
|
|||
|
||||
def _RTN : FLAT_AtomicRet_Pseudo <opName,
|
||||
(outs vdst_op:$vdst),
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
|
||||
" $vdst, $vaddr, $vdata, off$offset$glc1$slc$sccb",
|
||||
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
|
||||
" $vdst, $vaddr, $vdata, off$offset$cpol",
|
||||
[(set vt:$vdst,
|
||||
(atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>,
|
||||
GlobalSaddrTable<0, opName#"_rtn">,
|
||||
|
@ -463,8 +458,8 @@ multiclass FLAT_Global_Atomic_Pseudo_RTN<
|
|||
|
||||
def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName,
|
||||
(outs vdst_op:$vdst),
|
||||
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc, SCCB_0:$sccb),
|
||||
" $vdst, $vaddr, $vdata, $saddr$offset$glc1$slc$sccb">,
|
||||
(ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol),
|
||||
" $vdst, $vaddr, $vdata, $saddr$offset$cpol">,
|
||||
GlobalSaddrTable<1, opName#"_rtn">,
|
||||
AtomicNoRet <opName#"_saddr", 1> {
|
||||
let has_saddr = 1;
|
||||
|
@ -827,17 +822,17 @@ class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCN
|
|||
|
||||
class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
(node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
|
||||
(inst $vaddr, $offset, 0, 0, 0, 0, $in)
|
||||
(inst $vaddr, $offset, 0, $in)
|
||||
>;
|
||||
|
||||
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
(node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
|
||||
(inst $vaddr, $offset, 0, 0, 0, 0, $in)
|
||||
(inst $vaddr, $offset, 0, $in)
|
||||
>;
|
||||
|
||||
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)),
|
||||
(inst $saddr, $voffset, $offset, 0, 0, 0, 0, $in)
|
||||
(inst $saddr, $voffset, $offset, 0, $in)
|
||||
>;
|
||||
|
||||
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
|
@ -847,7 +842,7 @@ class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt>
|
|||
|
||||
class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
(vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))),
|
||||
(inst $saddr, $voffset, $offset, 0, 0, 0)
|
||||
(inst $saddr, $voffset, $offset, 0)
|
||||
>;
|
||||
|
||||
class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
|
||||
|
@ -928,7 +923,7 @@ class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType
|
|||
|
||||
class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
(node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in),
|
||||
(inst $vaddr, $offset, 0, 0, 0, 0, $in)
|
||||
(inst $vaddr, $offset, 0, $in)
|
||||
>;
|
||||
|
||||
class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
|
@ -943,7 +938,7 @@ class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType v
|
|||
|
||||
class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
|
||||
(vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
|
||||
(inst $saddr, $offset, 0, 0, 0, 0, $in)
|
||||
(inst $saddr, $offset, 0, $in)
|
||||
>;
|
||||
|
||||
class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
|
||||
|
@ -1390,7 +1385,7 @@ class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> :
|
|||
let AssemblerPredicate = isGFX8GFX9;
|
||||
let DecoderNamespace = "GFX8";
|
||||
|
||||
let Inst{25} = !if(has_sccb, sccb, ps.sccbValue);
|
||||
let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
|
||||
let AsmString = ps.Mnemonic #
|
||||
!subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands);
|
||||
}
|
||||
|
@ -1567,7 +1562,7 @@ class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> :
|
|||
let DecoderNamespace = "GFX10";
|
||||
|
||||
let Inst{11-0} = offset{11-0};
|
||||
let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue);
|
||||
let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue);
|
||||
let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d);
|
||||
let Inst{55} = 0;
|
||||
}
|
||||
|
|
|
@ -202,26 +202,19 @@ void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
|
|||
printNamedBit(MI, OpNo, O, "gds");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printDLC(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
if (AMDGPU::isGFX10Plus(STI))
|
||||
printNamedBit(MI, OpNo, O, "dlc");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSCCB(const MCInst *MI, unsigned OpNo,
|
||||
void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
if (AMDGPU::isGFX90A(STI))
|
||||
printNamedBit(MI, OpNo, O, "scc");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
printNamedBit(MI, OpNo, O, "glc");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
printNamedBit(MI, OpNo, O, "slc");
|
||||
auto Imm = MI->getOperand(OpNo).getImm();
|
||||
if (Imm & CPol::GLC)
|
||||
O << " glc";
|
||||
if (Imm & CPol::SLC)
|
||||
O << " slc";
|
||||
if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
|
||||
O << " dlc";
|
||||
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
|
||||
O << " scc";
|
||||
if (Imm & ~CPol::ALL)
|
||||
O << " /* unexpected cache policy bit */";
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
|
||||
|
|
|
@ -68,14 +68,8 @@ private:
|
|||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printDLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printSCCB(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printCPol(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O);
|
||||
void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
|
|
|
@ -263,10 +263,10 @@ class MIMG_NoSampler_Helper <mimgopc op, string asm,
|
|||
string dns="">
|
||||
: MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
|
||||
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -276,10 +276,10 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
|
|||
string dns="">
|
||||
: MIMG_gfx90a <op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
|
||||
let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da"
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -288,10 +288,10 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
|
|||
string dns="">
|
||||
: MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
|
||||
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
|
||||
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
|
||||
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
|
||||
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -301,10 +301,10 @@ class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
|
|||
: MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
|
||||
let InOperandList = !con(AddrIns,
|
||||
(ins SReg_256:$srsrc, DMask:$dmask,
|
||||
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
|
||||
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
|
||||
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -389,10 +389,10 @@ class MIMG_Store_Helper <mimgopc op, string asm,
|
|||
string dns = "">
|
||||
: MIMG_gfx6789<op.BASE, (outs), dns> {
|
||||
let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -403,10 +403,10 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
|
|||
: MIMG_gfx90a<op.BASE, (outs), dns> {
|
||||
let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
|
||||
addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da"
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -415,10 +415,10 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
|
|||
string dns="">
|
||||
: MIMG_gfx10<op.BASE, (outs), dns> {
|
||||
let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
|
||||
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
|
||||
GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
|
||||
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -429,10 +429,10 @@ class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
|
|||
let InOperandList = !con((ins DataRC:$vdata),
|
||||
AddrIns,
|
||||
(ins SReg_256:$srsrc, DMask:$dmask,
|
||||
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
|
||||
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe"
|
||||
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -507,9 +507,9 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
|
|||
let AsmMatchConverter = "cvtMIMGAtomic";
|
||||
|
||||
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
|
||||
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da";
|
||||
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
|
||||
}
|
||||
|
||||
class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
|
||||
|
@ -520,9 +520,9 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
|
|||
|
||||
let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
|
||||
addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, LWE:$lwe, DA:$da);
|
||||
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$sccb$glc$slc$r128$lwe$da";
|
||||
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
|
||||
}
|
||||
|
||||
class MIMG_Atomic_si<mimgopc op, string asm, RegisterClass data_rc,
|
||||
|
@ -555,9 +555,9 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
|
|||
let AsmMatchConverter = "cvtMIMGAtomic";
|
||||
|
||||
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
|
||||
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
|
||||
GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
|
||||
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
|
||||
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe);
|
||||
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
|
||||
}
|
||||
|
||||
class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
|
||||
|
@ -571,9 +571,9 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
|
|||
let InOperandList = !con((ins DataRC:$vdata),
|
||||
AddrIns,
|
||||
(ins SReg_256:$srsrc, DMask:$dmask,
|
||||
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
|
||||
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
|
||||
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$dlc$glc$slc$r128$a16$tfe$lwe";
|
||||
Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe));
|
||||
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
|
||||
}
|
||||
|
||||
multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
|
||||
|
@ -660,10 +660,10 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
|
|||
RegisterClass src_rc, string dns="">
|
||||
: MIMG_gfx6789 <op.BASE, (outs dst_rc:$vdata), dns> {
|
||||
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$tfe$lwe$da"
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$tfe$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -671,10 +671,10 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
|
|||
RegisterClass src_rc, string dns="">
|
||||
: MIMG_gfx90a<op.BASE, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
|
||||
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
|
||||
DMask:$dmask, UNorm:$unorm, SCCB_0:$sccb, GLC:$glc, SLC:$slc,
|
||||
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, LWE:$lwe, DA:$da),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$sccb$glc$slc$r128$lwe$da"
|
||||
let AsmString = asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$cpol$r128$lwe$da"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -683,11 +683,11 @@ class MIMG_Sampler_gfx10<mimgopc op, string opcode,
|
|||
string dns="">
|
||||
: MIMG_gfx10<op.BASE, (outs DataRC:$vdata), dns> {
|
||||
let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, SReg_128:$ssamp,
|
||||
DMask:$dmask, Dim:$dim, UNorm:$unorm, DLC:$dlc,
|
||||
GLC:$glc, SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = opcode#" $vdata, $vaddr0, $srsrc, $ssamp$dmask$dim$unorm"
|
||||
#"$dlc$glc$slc$r128$a16$tfe$lwe"
|
||||
#"$cpol$r128$a16$tfe$lwe"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -697,11 +697,11 @@ class MIMG_Sampler_nsa_gfx10<mimgopc op, string opcode,
|
|||
: MIMG_nsa_gfx10<op.BASE, (outs DataRC:$vdata), num_addrs, dns> {
|
||||
let InOperandList = !con(AddrIns,
|
||||
(ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
|
||||
Dim:$dim, UNorm:$unorm, DLC:$dlc, GLC:$glc,
|
||||
SLC:$slc, R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
Dim:$dim, UNorm:$unorm, CPol:$cpol,
|
||||
R128A16:$r128, GFX10A16:$a16, TFE:$tfe, LWE:$lwe),
|
||||
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
|
||||
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc, $ssamp$dmask$dim$unorm"
|
||||
#"$dlc$glc$slc$r128$a16$tfe$lwe"
|
||||
#"$cpol$r128$a16$tfe$lwe"
|
||||
#!if(BaseOpcode.HasD16, "$d16", "");
|
||||
}
|
||||
|
||||
|
@ -890,9 +890,7 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16>
|
|||
dmask = 0xf,
|
||||
unorm = 1,
|
||||
d16 = 0,
|
||||
glc = 0,
|
||||
slc = 0,
|
||||
dlc = 0,
|
||||
cpol = 0,
|
||||
tfe = 0,
|
||||
lwe = 0,
|
||||
r128 = 1,
|
||||
|
|
|
@ -276,6 +276,18 @@ enum : unsigned {
|
|||
} // namespace AMDGPU
|
||||
|
||||
namespace AMDGPU {
|
||||
namespace CPol {
|
||||
|
||||
enum CPol {
|
||||
GLC = 1,
|
||||
SLC = 2,
|
||||
DLC = 4,
|
||||
SCC = 16,
|
||||
ALL = GLC | SLC | DLC | SCC
|
||||
};
|
||||
|
||||
} // namespace CPol
|
||||
|
||||
namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
|
||||
|
||||
enum Id { // Message ID, width(4) [3:0].
|
||||
|
|
|
@ -135,10 +135,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
.addReg(SpillReg, RegState::Kill)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // scc
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
@ -148,12 +145,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(0) // scc
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
@ -180,10 +174,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
.addReg(SpillReg, RegState::Kill)
|
||||
.addReg(OffsetReg, HasOffsetReg ? RegState::Kill : 0)
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // scc
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
|
||||
if (!HasOffsetReg) {
|
||||
|
@ -205,12 +196,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(0) // scc
|
||||
.addMemOperand(MMO);
|
||||
} else {
|
||||
// No free register, use stack pointer and restore afterwards.
|
||||
|
@ -223,12 +211,9 @@ static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(0) // scc
|
||||
.addMemOperand(MMO);
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SUB_U32), SPReg)
|
||||
|
@ -259,10 +244,7 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // scc
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
@ -278,10 +260,7 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
SpillReg)
|
||||
.addReg(OffsetReg, RegState::Kill)
|
||||
.addImm(0)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // scc
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
@ -292,12 +271,9 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(0) // scc
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
@ -316,12 +292,9 @@ static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
|||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(0)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(0) // scc
|
||||
.addMemOperand(MMO);
|
||||
}
|
||||
|
||||
|
@ -416,8 +389,7 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
|
|||
BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
|
||||
.addReg(FlatScrInit)
|
||||
.addImm(EncodedOffset) // offset
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
|
||||
// Mask the offset in [47:0] of the descriptor
|
||||
|
@ -713,8 +685,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
|
|||
BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
|
||||
.addReg(Rsrc01)
|
||||
.addImm(EncodedOffset) // offset
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // cpol
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine)
|
||||
.addMemOperand(MMO);
|
||||
} else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
|
||||
|
@ -748,8 +719,7 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
|
|||
BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
|
||||
.addReg(MFI->getImplicitBufferPtrUserSGPR())
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO)
|
||||
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
|
||||
|
||||
|
|
|
@ -5768,28 +5768,6 @@ static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
|
|||
return DAG.getBuildVector(Type, DL, VecElts);
|
||||
}
|
||||
|
||||
static bool parseCachePolicy(SDValue CachePolicy, SelectionDAG &DAG,
|
||||
SDValue *GLC, SDValue *SLC, SDValue *DLC) {
|
||||
auto CachePolicyConst = cast<ConstantSDNode>(CachePolicy.getNode());
|
||||
|
||||
uint64_t Value = CachePolicyConst->getZExtValue();
|
||||
SDLoc DL(CachePolicy);
|
||||
if (GLC) {
|
||||
*GLC = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
|
||||
Value &= ~(uint64_t)0x1;
|
||||
}
|
||||
if (SLC) {
|
||||
*SLC = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
|
||||
Value &= ~(uint64_t)0x2;
|
||||
}
|
||||
if (DLC) {
|
||||
*DLC = DAG.getTargetConstant((Value & 0x4) ? 1 : 0, DL, MVT::i32);
|
||||
Value &= ~(uint64_t)0x4;
|
||||
}
|
||||
|
||||
return Value == 0;
|
||||
}
|
||||
|
||||
static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT,
|
||||
SDValue Src, int ExtraElts) {
|
||||
EVT SrcVT = Src.getValueType();
|
||||
|
@ -6184,19 +6162,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
|||
}
|
||||
}
|
||||
|
||||
SDValue GLC;
|
||||
SDValue SLC;
|
||||
SDValue DLC;
|
||||
if (BaseOpcode->Atomic) {
|
||||
GLC = True; // TODO no-return optimization
|
||||
if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
|
||||
DAG, nullptr, &SLC, IsGFX10Plus ? &DLC : nullptr))
|
||||
return Op;
|
||||
} else {
|
||||
if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex),
|
||||
DAG, &GLC, &SLC, IsGFX10Plus ? &DLC : nullptr))
|
||||
return Op;
|
||||
}
|
||||
unsigned CPol = cast<ConstantSDNode>(
|
||||
Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
|
||||
if (BaseOpcode->Atomic)
|
||||
CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
|
||||
if (CPol & ~AMDGPU::CPol::ALL)
|
||||
return Op;
|
||||
|
||||
SmallVector<SDValue, 26> Ops;
|
||||
if (BaseOpcode->Store || BaseOpcode->Atomic)
|
||||
|
@ -6212,12 +6183,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
|||
if (IsGFX10Plus)
|
||||
Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
|
||||
Ops.push_back(Unorm);
|
||||
if (!IsGFX10Plus)
|
||||
Ops.push_back(DAG.getTargetConstant(0, SDLoc(), MVT::i1));
|
||||
if (IsGFX10Plus)
|
||||
Ops.push_back(DLC);
|
||||
Ops.push_back(GLC);
|
||||
Ops.push_back(SLC);
|
||||
Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
|
||||
Ops.push_back(IsA16 && // r128, a16 for gfx9
|
||||
ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
|
||||
if (IsGFX10Plus)
|
||||
|
@ -6526,11 +6492,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
|
||||
SDLoc(Op), MVT::i32);
|
||||
case Intrinsic::amdgcn_s_buffer_load: {
|
||||
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
|
||||
SDValue GLC;
|
||||
SDValue DLC = DAG.getTargetConstant(0, DL, MVT::i1);
|
||||
if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
|
||||
IsGFX10Plus ? &DLC : nullptr))
|
||||
unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
|
||||
if (CPol & ~AMDGPU::CPol::ALL)
|
||||
return Op;
|
||||
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
|
||||
DAG);
|
||||
|
@ -11229,10 +11192,12 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
|
|||
int NoRetAtomicOp = AMDGPU::getAtomicNoRetOp(MI.getOpcode());
|
||||
if (NoRetAtomicOp != -1) {
|
||||
if (!Node->hasAnyUseOfValue(0)) {
|
||||
int Glc1Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
|
||||
AMDGPU::OpName::glc1);
|
||||
if (Glc1Idx != -1)
|
||||
MI.RemoveOperand(Glc1Idx);
|
||||
int CPolIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
|
||||
AMDGPU::OpName::cpol);
|
||||
if (CPolIdx != -1) {
|
||||
MachineOperand &CPol = MI.getOperand(CPolIdx);
|
||||
CPol.setImm(CPol.getImm() & ~AMDGPU::CPol::GLC);
|
||||
}
|
||||
MI.RemoveOperand(0);
|
||||
MI.setDesc(TII->get(NoRetAtomicOp));
|
||||
return;
|
||||
|
|
|
@ -261,6 +261,13 @@ class Enc64 {
|
|||
int Size = 8;
|
||||
}
|
||||
|
||||
def CPolBit {
|
||||
int GLC = 0;
|
||||
int SLC = 1;
|
||||
int DLC = 2;
|
||||
int SCC = 4;
|
||||
}
|
||||
|
||||
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
|
||||
|
||||
class VINTRPe <bits<2> op> : Enc32 {
|
||||
|
@ -281,21 +288,20 @@ class MIMGe : Enc64 {
|
|||
bits<10> vdata;
|
||||
bits<4> dmask;
|
||||
bits<1> unorm;
|
||||
bits<1> glc;
|
||||
bits<5> cpol;
|
||||
bits<1> r128;
|
||||
bits<1> tfe;
|
||||
bits<1> lwe;
|
||||
bits<1> slc;
|
||||
bit d16;
|
||||
bits<7> srsrc;
|
||||
bits<7> ssamp;
|
||||
|
||||
let Inst{11-8} = dmask;
|
||||
let Inst{12} = unorm;
|
||||
let Inst{13} = glc;
|
||||
let Inst{13} = cpol{CPolBit.GLC};
|
||||
let Inst{15} = r128;
|
||||
let Inst{17} = lwe;
|
||||
let Inst{25} = slc;
|
||||
let Inst{25} = cpol{CPolBit.SLC};
|
||||
let Inst{31-26} = 0x3c;
|
||||
let Inst{47-40} = vdata{7-0};
|
||||
let Inst{52-48} = srsrc{6-2};
|
||||
|
@ -306,10 +312,9 @@ class MIMGe : Enc64 {
|
|||
class MIMGe_gfx6789 <bits<8> op> : MIMGe {
|
||||
bits<8> vaddr;
|
||||
bits<1> da;
|
||||
bits<1> sccb;
|
||||
|
||||
let Inst{0} = op{7};
|
||||
let Inst{7} = sccb;
|
||||
let Inst{7} = cpol{CPolBit.SCC};
|
||||
let Inst{14} = da;
|
||||
let Inst{16} = tfe;
|
||||
let Inst{24-18} = op{6-0};
|
||||
|
@ -319,10 +324,9 @@ class MIMGe_gfx6789 <bits<8> op> : MIMGe {
|
|||
class MIMGe_gfx90a <bits<8> op> : MIMGe {
|
||||
bits<8> vaddr;
|
||||
bits<1> da;
|
||||
bits<1> sccb;
|
||||
|
||||
let Inst{0} = op{7};
|
||||
let Inst{7} = sccb;
|
||||
let Inst{7} = cpol{CPolBit.SCC};
|
||||
let Inst{14} = da;
|
||||
let Inst{16} = vdata{9}; // ACC bit
|
||||
let Inst{24-18} = op{6-0};
|
||||
|
@ -333,13 +337,12 @@ class MIMGe_gfx10 <bits<8> op> : MIMGe {
|
|||
bits<8> vaddr0;
|
||||
bits<3> dim;
|
||||
bits<2> nsa;
|
||||
bits<1> dlc;
|
||||
bits<1> a16;
|
||||
|
||||
let Inst{0} = op{7};
|
||||
let Inst{2-1} = nsa;
|
||||
let Inst{5-3} = dim;
|
||||
let Inst{7} = dlc;
|
||||
let Inst{7} = cpol{CPolBit.DLC};
|
||||
let Inst{16} = tfe;
|
||||
let Inst{24-18} = op{6-0};
|
||||
let Inst{39-32} = vaddr0;
|
||||
|
|
|
@ -5588,21 +5588,10 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
|
|||
.add(*SOffset)
|
||||
.add(*Offset);
|
||||
|
||||
// Atomics do not have this operand.
|
||||
if (const MachineOperand *GLC =
|
||||
getNamedOperand(MI, AMDGPU::OpName::glc)) {
|
||||
MIB.addImm(GLC->getImm());
|
||||
if (const MachineOperand *CPol =
|
||||
getNamedOperand(MI, AMDGPU::OpName::cpol)) {
|
||||
MIB.addImm(CPol->getImm());
|
||||
}
|
||||
if (const MachineOperand *DLC =
|
||||
getNamedOperand(MI, AMDGPU::OpName::dlc)) {
|
||||
MIB.addImm(DLC->getImm());
|
||||
}
|
||||
if (const MachineOperand *SCCB =
|
||||
getNamedOperand(MI, AMDGPU::OpName::sccb)) {
|
||||
MIB.addImm(SCCB->getImm());
|
||||
}
|
||||
|
||||
MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc));
|
||||
|
||||
if (const MachineOperand *TFE =
|
||||
getNamedOperand(MI, AMDGPU::OpName::tfe)) {
|
||||
|
@ -5622,7 +5611,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
|
|||
.addReg(NewSRsrc)
|
||||
.add(*SOffset)
|
||||
.add(*Offset)
|
||||
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
|
||||
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::cpol))
|
||||
.cloneMemRefs(MI);
|
||||
}
|
||||
|
||||
|
|
|
@ -818,24 +818,16 @@ def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
|
|||
// MUBUF/SMEM Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def extract_glc : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
|
||||
}]>;
|
||||
|
||||
def extract_slc : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
|
||||
}]>;
|
||||
|
||||
def extract_dlc : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
|
||||
def extract_cpol : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8);
|
||||
}]>;
|
||||
|
||||
def extract_swz : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
|
||||
}]>;
|
||||
|
||||
def extract_sccb : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 1, SDLoc(N), MVT::i8);
|
||||
def set_glc : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8);
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1090,6 +1082,12 @@ class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
|
|||
let ParserMatchClass = MatchClass;
|
||||
}
|
||||
|
||||
class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> :
|
||||
OperandWithDefaultOps<i32, (ops (i32 1))> {
|
||||
let PrintMethod = "print"#Name;
|
||||
let ParserMatchClass = MatchClass;
|
||||
}
|
||||
|
||||
let OperandType = "OPERAND_IMMEDIATE" in {
|
||||
|
||||
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
|
||||
|
@ -1113,18 +1111,9 @@ def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
|
|||
def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>;
|
||||
def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>;
|
||||
|
||||
def SCCB : NamedOperandBit<"SCCB", NamedMatchClass<"SCCB">>;
|
||||
def SCCB_0 : NamedOperandBit_0<"SCCB", NamedMatchClass<"SCCB">>;
|
||||
|
||||
def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
|
||||
def DLC_0 : NamedOperandBit_0<"DLC", NamedMatchClass<"DLC">>;
|
||||
|
||||
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
|
||||
def GLC_0 : NamedOperandBit_0<"GLC", NamedMatchClass<"GLC">>;
|
||||
def GLC_1 : NamedOperandBit_1<"GLC", NamedMatchClass<"GLC_1">>;
|
||||
|
||||
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
|
||||
def SLC_0 : NamedOperandBit_0<"SLC", NamedMatchClass<"SLC">>;
|
||||
def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>;
|
||||
def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>;
|
||||
def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>;
|
||||
|
||||
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
|
||||
def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
|
||||
|
|
|
@ -104,10 +104,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
|
|||
unsigned BaseOff;
|
||||
unsigned DMask;
|
||||
InstClassEnum InstClass;
|
||||
bool GLC = 0;
|
||||
bool SLC = 0;
|
||||
bool DLC = 0;
|
||||
bool SCCB = 0; // vmem only.
|
||||
unsigned CPol;
|
||||
bool UseST64;
|
||||
int AddrIdx[MaxAddressRegs];
|
||||
const MachineOperand *AddrReg[MaxAddressRegs];
|
||||
|
@ -533,14 +530,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
|
|||
if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) {
|
||||
Offset &= 0xffff;
|
||||
} else if (InstClass != MIMG) {
|
||||
GLC = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm();
|
||||
if (InstClass != S_BUFFER_LOAD_IMM) {
|
||||
SLC = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm();
|
||||
}
|
||||
DLC = TII.getNamedOperand(*I, AMDGPU::OpName::dlc)->getImm();
|
||||
if (InstClass != S_BUFFER_LOAD_IMM) {
|
||||
SCCB = TII.getNamedOperand(*I, AMDGPU::OpName::sccb)->getImm();
|
||||
}
|
||||
CPol = TII.getNamedOperand(*I, AMDGPU::OpName::cpol)->getImm();
|
||||
}
|
||||
|
||||
AddressRegs Regs = getRegs(Opc, TII);
|
||||
|
@ -690,10 +680,9 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
|
|||
return false;
|
||||
|
||||
// Check other optional immediate operands for equality.
|
||||
unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc,
|
||||
AMDGPU::OpName::d16, AMDGPU::OpName::unorm,
|
||||
AMDGPU::OpName::da, AMDGPU::OpName::r128,
|
||||
AMDGPU::OpName::a16, AMDGPU::OpName::dlc};
|
||||
unsigned OperandsToMatch[] = {AMDGPU::OpName::cpol, AMDGPU::OpName::d16,
|
||||
AMDGPU::OpName::unorm, AMDGPU::OpName::da,
|
||||
AMDGPU::OpName::r128, AMDGPU::OpName::a16};
|
||||
|
||||
for (auto op : OperandsToMatch) {
|
||||
int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op);
|
||||
|
@ -798,9 +787,8 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
|
|||
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
|
||||
return (EltOffset0 + CI.Width == EltOffset1 ||
|
||||
EltOffset1 + Paired.Width == EltOffset0) &&
|
||||
CI.GLC == Paired.GLC && CI.DLC == Paired.DLC &&
|
||||
(CI.InstClass == S_BUFFER_LOAD_IMM ||
|
||||
(CI.SLC == Paired.SLC && CI.SCCB == Paired.SCCB));
|
||||
CI.CPol == Paired.CPol &&
|
||||
(CI.InstClass == S_BUFFER_LOAD_IMM || CI.CPol == Paired.CPol);
|
||||
}
|
||||
|
||||
// If the offset in elements doesn't fit in 8-bits, we might be able to use
|
||||
|
@ -1301,8 +1289,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(
|
|||
BuildMI(*MBB, Paired.I, DL, TII->get(Opcode), DestReg)
|
||||
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase))
|
||||
.addImm(MergedOffset) // offset
|
||||
.addImm(CI.GLC) // glc
|
||||
.addImm(CI.DLC) // dlc
|
||||
.addImm(CI.CPol) // cpol
|
||||
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
|
||||
|
@ -1361,12 +1348,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair(
|
|||
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
|
||||
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
||||
.addImm(MergedOffset) // offset
|
||||
.addImm(CI.GLC) // glc
|
||||
.addImm(CI.SLC) // slc
|
||||
.addImm(CI.CPol) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(CI.DLC) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(CI.SCCB) // scc
|
||||
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
|
||||
|
@ -1429,12 +1413,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferLoadPair(
|
|||
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
||||
.addImm(MergedOffset) // offset
|
||||
.addImm(JoinedFormat) // format
|
||||
.addImm(CI.GLC) // glc
|
||||
.addImm(CI.SLC) // slc
|
||||
.addImm(CI.CPol) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(CI.DLC) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(CI.SCCB) // scc
|
||||
.addMemOperand(
|
||||
combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
|
@ -1510,12 +1491,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair(
|
|||
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
||||
.addImm(std::min(CI.Offset, Paired.Offset)) // offset
|
||||
.addImm(JoinedFormat) // format
|
||||
.addImm(CI.GLC) // glc
|
||||
.addImm(CI.SLC) // slc
|
||||
.addImm(CI.CPol) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(CI.DLC) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(CI.SCCB) // scc
|
||||
.addMemOperand(
|
||||
combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
|
@ -1665,12 +1643,9 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferStorePair(
|
|||
MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc))
|
||||
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset))
|
||||
.addImm(std::min(CI.Offset, Paired.Offset)) // offset
|
||||
.addImm(CI.GLC) // glc
|
||||
.addImm(CI.SLC) // slc
|
||||
.addImm(CI.CPol) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(CI.DLC) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(CI.SCCB) // scc
|
||||
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
moveInstsAfter(MIB, InstsToMove);
|
||||
|
|
|
@ -84,22 +84,6 @@ enum class SIAtomicAddrSpace {
|
|||
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
|
||||
};
|
||||
|
||||
/// Sets named bit \p BitName to "true" if present in instruction \p MI.
|
||||
/// \returns Returns true if \p MI is modified, false otherwise.
|
||||
template <uint16_t BitName>
|
||||
bool enableNamedBit(const MachineBasicBlock::iterator &MI) {
|
||||
int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
|
||||
if (BitIdx == -1)
|
||||
return false;
|
||||
|
||||
MachineOperand &Bit = MI->getOperand(BitIdx);
|
||||
if (Bit.getImm() != 0)
|
||||
return false;
|
||||
|
||||
Bit.setImm(1);
|
||||
return true;
|
||||
}
|
||||
|
||||
class SIMemOpInfo final {
|
||||
private:
|
||||
|
||||
|
@ -288,6 +272,11 @@ protected:
|
|||
|
||||
SICacheControl(const GCNSubtarget &ST);
|
||||
|
||||
/// Sets named bit \p BitName to "true" if present in instruction \p MI.
|
||||
/// \returns Returns true if \p MI is modified, false otherwise.
|
||||
bool enableNamedBit(const MachineBasicBlock::iterator MI,
|
||||
AMDGPU::CPol::CPol Bit) const;
|
||||
|
||||
public:
|
||||
|
||||
/// Create a cache control for the subtarget \p ST.
|
||||
|
@ -369,13 +358,13 @@ protected:
|
|||
/// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
|
||||
/// is modified, false otherwise.
|
||||
bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
|
||||
return enableNamedBit<AMDGPU::OpName::glc>(MI);
|
||||
return enableNamedBit(MI, AMDGPU::CPol::GLC);
|
||||
}
|
||||
|
||||
/// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
|
||||
/// is modified, false otherwise.
|
||||
bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
|
||||
return enableNamedBit<AMDGPU::OpName::slc>(MI);
|
||||
return enableNamedBit(MI, AMDGPU::CPol::SLC);
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -436,7 +425,7 @@ protected:
|
|||
/// Sets SCC bit to "true" if present in \p MI. Returns true if \p MI
|
||||
/// is modified, false otherwise.
|
||||
bool enableSCCBit(const MachineBasicBlock::iterator &MI) const {
|
||||
return enableNamedBit<AMDGPU::OpName::sccb>(MI);
|
||||
return enableNamedBit(MI, AMDGPU::CPol::SCC);;
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -485,7 +474,7 @@ protected:
|
|||
/// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
|
||||
/// is modified, false otherwise.
|
||||
bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
|
||||
return enableNamedBit<AMDGPU::OpName::dlc>(MI);
|
||||
return enableNamedBit(MI, AMDGPU::CPol::DLC);
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -785,6 +774,16 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
|
|||
InsertCacheInv = !AmdgcnSkipCacheInvalidations;
|
||||
}
|
||||
|
||||
bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
|
||||
AMDGPU::CPol::CPol Bit) const {
|
||||
MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
|
||||
if (!CPol)
|
||||
return false;
|
||||
|
||||
CPol->setImm(CPol->getImm() | Bit);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */
|
||||
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
|
||||
GCNSubtarget::Generation Generation = ST.getGeneration();
|
||||
|
|
|
@ -743,12 +743,9 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
|
|||
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
|
||||
.add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
|
||||
.addImm(Offset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addImm(0) // scc
|
||||
.cloneMemRefs(*MI);
|
||||
|
||||
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
|
||||
|
@ -1010,13 +1007,10 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
|||
MIB.addReg(SOffset, SOffsetRegState);
|
||||
}
|
||||
MIB.addImm(Offset + RemRegOffset)
|
||||
.addImm(0) // glc
|
||||
.addImm(0) // slc
|
||||
.addImm(0); // tfe for MUBUF or dlc for FLAT
|
||||
.addImm(0); // cpol
|
||||
if (!IsFlat)
|
||||
MIB.addImm(0) // dlc
|
||||
MIB.addImm(0) // tfe
|
||||
.addImm(0); // swz
|
||||
MIB.addImm(0); // scc
|
||||
MIB.addMemOperand(NewMMO);
|
||||
|
||||
if (!IsAGPR && NeedSuperRegDef)
|
||||
|
|
|
@ -71,6 +71,7 @@ class SM_Real <SM_Pseudo ps>
|
|||
bits<7> sdst;
|
||||
bits<32> offset;
|
||||
bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
|
||||
bits<5> cpol;
|
||||
}
|
||||
|
||||
class SM_Probe_Pseudo <string opName, dag ins, bit isImm>
|
||||
|
@ -122,8 +123,8 @@ multiclass SM_Pseudo_Loads<string opName,
|
|||
RegisterClass dstClass> {
|
||||
def _IMM : SM_Load_Pseudo <opName,
|
||||
(outs dstClass:$sdst),
|
||||
(ins baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
|
||||
" $sdst, $sbase, $offset$glc$dlc", []> {
|
||||
(ins baseClass:$sbase, i32imm:$offset, CPol:$cpol),
|
||||
" $sdst, $sbase, $offset$cpol", []> {
|
||||
let offset_is_imm = 1;
|
||||
let BaseClass = baseClass;
|
||||
let PseudoInstr = opName # "_IMM";
|
||||
|
@ -133,8 +134,8 @@ multiclass SM_Pseudo_Loads<string opName,
|
|||
|
||||
def _SGPR : SM_Load_Pseudo <opName,
|
||||
(outs dstClass:$sdst),
|
||||
(ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
|
||||
" $sdst, $sbase, $offset$glc$dlc", []> {
|
||||
(ins baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
|
||||
" $sdst, $sbase, $offset$cpol", []> {
|
||||
let BaseClass = baseClass;
|
||||
let PseudoInstr = opName # "_SGPR";
|
||||
let has_glc = 1;
|
||||
|
@ -146,8 +147,8 @@ multiclass SM_Pseudo_Stores<string opName,
|
|||
RegisterClass baseClass,
|
||||
RegisterClass srcClass> {
|
||||
def _IMM : SM_Store_Pseudo <opName,
|
||||
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc, i1imm:$dlc),
|
||||
" $sdata, $sbase, $offset$glc$dlc", []> {
|
||||
(ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, CPol:$cpol),
|
||||
" $sdata, $sbase, $offset$cpol", []> {
|
||||
let offset_is_imm = 1;
|
||||
let BaseClass = baseClass;
|
||||
let SrcClass = srcClass;
|
||||
|
@ -155,8 +156,8 @@ multiclass SM_Pseudo_Stores<string opName,
|
|||
}
|
||||
|
||||
def _SGPR : SM_Store_Pseudo <opName,
|
||||
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc, i1imm:$dlc),
|
||||
" $sdata, $sbase, $offset$glc$dlc", []> {
|
||||
(ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, CPol:$cpol),
|
||||
" $sdata, $sbase, $offset$cpol", []> {
|
||||
let BaseClass = baseClass;
|
||||
let SrcClass = srcClass;
|
||||
let PseudoInstr = opName # "_SGPR";
|
||||
|
@ -232,6 +233,8 @@ class SM_Atomic_Pseudo <string opName,
|
|||
|
||||
let IsAtomicNoRet = !not(isRet);
|
||||
let IsAtomicRet = isRet;
|
||||
|
||||
let AsmMatchConverter = "cvtSMEMAtomic";
|
||||
}
|
||||
|
||||
class SM_Pseudo_Atomic<string opName,
|
||||
|
@ -241,13 +244,14 @@ class SM_Pseudo_Atomic<string opName,
|
|||
bit isRet,
|
||||
string opNameWithSuffix = opName # !if(isImm,
|
||||
!if(isRet, "_IMM_RTN", "_IMM"),
|
||||
!if(isRet, "_SGPR_RTN", "_SGPR"))> :
|
||||
!if(isRet, "_SGPR_RTN", "_SGPR")),
|
||||
Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> :
|
||||
SM_Atomic_Pseudo<opName,
|
||||
!if(isRet, (outs dataClass:$sdst), (outs)),
|
||||
!if(isImm,
|
||||
(ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, DLC:$dlc),
|
||||
(ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, DLC:$dlc)),
|
||||
!if(isRet, " $sdst", " $sdata") # ", $sbase, $offset" # !if(isRet, " glc", "") # "$dlc",
|
||||
(ins dataClass:$sdata, baseClass:$sbase, smem_offset:$offset, CPolTy:$cpol),
|
||||
(ins dataClass:$sdata, baseClass:$sbase, SReg_32:$offset, CPolTy:$cpol)),
|
||||
!if(isRet, " $sdst", " $sdata") # ", $sbase, $offset$cpol",
|
||||
isRet>,
|
||||
AtomicNoRet <opNameWithSuffix, isRet> {
|
||||
let offset_is_imm = isImm;
|
||||
|
@ -463,13 +467,13 @@ multiclass SM_Real_Loads_si<bits<5> op, string ps,
|
|||
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
|
||||
|
||||
def _IMM_si : SMRD_Real_si <op, immPs> {
|
||||
let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
|
||||
}
|
||||
|
||||
// FIXME: The operand name $offset is inconsistent with $soff used
|
||||
// in the pseudo
|
||||
def _SGPR_si : SMRD_Real_si <op, sgprPs> {
|
||||
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -497,15 +501,13 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
|
|||
: SM_Real<ps>
|
||||
, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
|
||||
, Enc64 {
|
||||
bit glc;
|
||||
|
||||
let AssemblerPredicate = isGFX8GFX9;
|
||||
let DecoderNamespace = "GFX8";
|
||||
|
||||
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
|
||||
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
|
||||
|
||||
let Inst{16} = !if(ps.has_glc, glc, ?);
|
||||
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
|
||||
let Inst{17} = imm;
|
||||
let Inst{25-18} = op;
|
||||
let Inst{31-26} = 0x30; //encoding
|
||||
|
@ -519,10 +521,10 @@ multiclass SM_Real_Loads_vi<bits<8> op, string ps,
|
|||
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
|
||||
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
|
||||
def _IMM_vi : SMEM_Real_vi <op, immPs> {
|
||||
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
|
||||
}
|
||||
def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
|
||||
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -540,11 +542,11 @@ multiclass SM_Real_Stores_vi<bits<8> op, string ps,
|
|||
// FIXME: The operand name $offset is inconsistent with $soff used
|
||||
// in the pseudo
|
||||
def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
|
||||
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
|
||||
}
|
||||
|
||||
def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
|
||||
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -604,8 +606,8 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
|
|||
let Constraints = ps.Constraints;
|
||||
let DisableEncoding = ps.DisableEncoding;
|
||||
|
||||
let glc = ps.glc;
|
||||
let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
|
||||
let cpol{CPolBit.GLC} = ps.glc;
|
||||
let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
|
||||
}
|
||||
|
||||
multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
|
||||
|
@ -694,7 +696,7 @@ class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
|
|||
|
||||
let AssemblerPredicate = isGFX7Only;
|
||||
let DecoderNamespace = "GFX7";
|
||||
let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol);
|
||||
|
||||
let LGKM_CNT = ps.LGKM_CNT;
|
||||
let mayLoad = ps.mayLoad;
|
||||
|
@ -772,26 +774,26 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
|
|||
// 1. IMM offset
|
||||
def : GCNPat <
|
||||
(smrd_load (SMRDImm i64:$sbase, i32:$offset)),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0, 0))
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
|
||||
>;
|
||||
|
||||
// 2. 32-bit IMM offset on CI
|
||||
def : GCNPat <
|
||||
(smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
|
||||
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0, 0))> {
|
||||
(vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
|
||||
let OtherPredicates = [isGFX7Only];
|
||||
}
|
||||
|
||||
// 3. SGPR offset
|
||||
def : GCNPat <
|
||||
(smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0, 0))
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
|
||||
>;
|
||||
|
||||
// 4. No offset
|
||||
def : GCNPat <
|
||||
(vt (smrd_load (i64 SReg_64:$sbase))),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0, 0))
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -799,8 +801,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
|||
// 1. Offset as an immediate
|
||||
def : GCNPat <
|
||||
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_glc $cachepolicy),
|
||||
(extract_dlc $cachepolicy)))> {
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> {
|
||||
let AddedComplexity = 2;
|
||||
}
|
||||
|
||||
|
@ -808,7 +809,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
|||
def : GCNPat <
|
||||
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
|
||||
(!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset,
|
||||
(extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
|
||||
(extract_cpol $cachepolicy))> {
|
||||
let OtherPredicates = [isGFX7Only];
|
||||
let AddedComplexity = 1;
|
||||
}
|
||||
|
@ -816,8 +817,7 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
|
|||
// 3. Offset loaded in an 32bit SGPR
|
||||
def : GCNPat <
|
||||
(SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_glc $cachepolicy),
|
||||
(extract_dlc $cachepolicy)))
|
||||
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$offset, (extract_cpol $cachepolicy)))
|
||||
>;
|
||||
}
|
||||
|
||||
|
@ -883,16 +883,13 @@ def : GCNPat <
|
|||
|
||||
class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> :
|
||||
SM_Real<ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10>, Enc64 {
|
||||
bit glc;
|
||||
bit dlc;
|
||||
|
||||
let AssemblerPredicate = isGFX10Plus;
|
||||
let DecoderNamespace = "GFX10";
|
||||
|
||||
let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
|
||||
let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
|
||||
let Inst{14} = !if(ps.has_dlc, dlc, ?);
|
||||
let Inst{16} = !if(ps.has_glc, glc, ?);
|
||||
let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?);
|
||||
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
|
||||
let Inst{25-18} = op;
|
||||
let Inst{31-26} = 0x3d;
|
||||
let Inst{52-32} = !if(ps.offset_is_imm, !if(ps.has_offset, offset{20-0}, ?), ?);
|
||||
|
@ -904,10 +901,10 @@ multiclass SM_Real_Loads_gfx10<bits<8> op, string ps,
|
|||
SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
|
||||
SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
|
||||
def _IMM_gfx10 : SMEM_Real_gfx10<op, immPs> {
|
||||
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
|
||||
}
|
||||
def _SGPR_gfx10 : SMEM_Real_gfx10<op, sgprPs> {
|
||||
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -924,11 +921,11 @@ multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
|
|||
// FIXME: The operand name $offset is inconsistent with $soff used
|
||||
// in the pseudo
|
||||
def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
|
||||
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
|
||||
}
|
||||
|
||||
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
|
||||
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc, DLC:$dlc);
|
||||
let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, CPol:$cpol);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -987,15 +984,14 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
|
|||
AtomicNoRet <!subst("_RTN","",NAME), ps.glc> {
|
||||
|
||||
bits<7> sdata;
|
||||
bit dlc;
|
||||
|
||||
let Constraints = ps.Constraints;
|
||||
let DisableEncoding = ps.DisableEncoding;
|
||||
|
||||
let glc = ps.glc;
|
||||
let cpol{CPolBit.GLC} = ps.glc;
|
||||
|
||||
let Inst{14} = !if(ps.has_dlc, dlc, 0);
|
||||
let Inst{12-6} = !if(glc, sdst{6-0}, sdata{6-0});
|
||||
let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
|
||||
let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
|
||||
}
|
||||
|
||||
multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
|
||||
|
|
|
@ -19,7 +19,7 @@ body: |
|
|||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -27,7 +27,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -35,7 +35,7 @@ body: |
|
|||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -71,7 +71,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -79,7 +79,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -97,7 +97,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -125,7 +125,7 @@ body: |
|
|||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -133,7 +133,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -141,7 +141,7 @@ body: |
|
|||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -177,7 +177,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -185,7 +185,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -203,7 +203,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -241,7 +241,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -259,7 +259,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -277,7 +277,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -305,21 +305,21 @@ body: |
|
|||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -343,21 +343,21 @@ body: |
|
|||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = COPY $vgpr4_vgpr5
|
||||
|
|
|
@ -26,7 +26,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX6: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
|
||||
|
@ -40,7 +40,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX7: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
|
||||
|
@ -49,7 +49,7 @@ body: |
|
|||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -57,7 +57,7 @@ body: |
|
|||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -65,7 +65,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -73,7 +73,7 @@ body: |
|
|||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -104,7 +104,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX6: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
|
||||
|
@ -118,7 +118,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX7: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
|
||||
|
@ -137,7 +137,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -155,7 +155,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -163,7 +163,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -171,7 +171,7 @@ body: |
|
|||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -204,7 +204,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]]
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
|
||||
|
@ -218,7 +218,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]]
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
|
||||
|
@ -227,7 +227,7 @@ body: |
|
|||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -235,7 +235,7 @@ body: |
|
|||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -243,7 +243,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -251,7 +251,7 @@ body: |
|
|||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -282,7 +282,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]]
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
|
||||
|
@ -296,7 +296,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[COPY3]]
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
|
||||
|
@ -315,7 +315,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -333,7 +333,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -341,7 +341,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -349,7 +349,7 @@ body: |
|
|||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -392,7 +392,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX6: $vgpr0 = COPY [[COPY7]]
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
|
||||
|
@ -416,7 +416,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE3]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE4]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX7: $vgpr0 = COPY [[COPY7]]
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
|
||||
|
@ -435,7 +435,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -453,7 +453,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -461,7 +461,7 @@ body: |
|
|||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -469,7 +469,7 @@ body: |
|
|||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], -4, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -502,7 +502,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -515,7 +515,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
|
@ -523,28 +523,28 @@ body: |
|
|||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -573,7 +573,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -586,7 +586,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
|
@ -594,28 +594,28 @@ body: |
|
|||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = COPY $vgpr4_vgpr5
|
||||
|
@ -643,7 +643,7 @@ body: |
|
|||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
|
||||
; GFX6: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
|
||||
|
@ -656,7 +656,7 @@ body: |
|
|||
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
|
||||
; GFX7: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
|
||||
|
@ -666,7 +666,7 @@ body: |
|
|||
; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX7-FLAT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX7-FLAT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]]
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
|
||||
; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
|
||||
|
@ -675,7 +675,7 @@ body: |
|
|||
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[COPY]]
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY3]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
|
||||
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
|
||||
|
@ -684,7 +684,7 @@ body: |
|
|||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr
|
||||
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
|
||||
|
@ -693,7 +693,7 @@ body: |
|
|||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -723,7 +723,7 @@ body: |
|
|||
; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
|
||||
; GFX6: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
|
||||
|
@ -736,7 +736,7 @@ body: |
|
|||
; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN]].sub0
|
||||
; GFX7: $vgpr0 = COPY [[COPY3]]
|
||||
; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
|
||||
|
@ -756,7 +756,7 @@ body: |
|
|||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX7-FLAT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
|
||||
; GFX8: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
|
||||
|
@ -775,7 +775,7 @@ body: |
|
|||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY7]], [[REG_SEQUENCE2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
|
||||
; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
|
||||
; GFX9: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
|
||||
|
@ -784,7 +784,7 @@ body: |
|
|||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
|
||||
; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_sgpr_ptr_offset_4095
|
||||
; GFX10: liveins: $sgpr0_sgpr1, $vgpr2, $vgpr3
|
||||
|
@ -793,7 +793,7 @@ body: |
|
|||
; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
|
||||
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_CMPSWAP_SADDR_RTN]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
|
|
@ -17,19 +17,19 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -51,17 +51,17 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 0)
|
||||
|
@ -91,13 +91,13 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -113,7 +113,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -147,12 +147,12 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -167,7 +167,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -199,13 +199,13 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -221,7 +221,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -255,12 +255,12 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -275,7 +275,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -307,13 +307,13 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -329,7 +329,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -363,12 +363,12 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -383,7 +383,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -415,7 +415,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -431,7 +431,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -447,7 +447,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -481,7 +481,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -496,7 +496,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -511,7 +511,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4097
|
||||
|
@ -533,19 +533,19 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -567,17 +567,17 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 0)
|
||||
|
@ -607,13 +607,13 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
|
@ -629,7 +629,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -663,12 +663,12 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX9: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -683,7 +683,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
|
|
@ -18,7 +18,7 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -29,19 +29,19 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -63,7 +63,7 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -73,17 +73,17 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 4, addrspace 1)
|
||||
|
@ -113,7 +113,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -124,19 +124,19 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -170,7 +170,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -180,17 +180,17 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -222,7 +222,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -233,13 +233,13 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -255,7 +255,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -289,7 +289,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -299,12 +299,12 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -319,7 +319,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -351,7 +351,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -362,13 +362,13 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -384,7 +384,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -418,7 +418,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -428,12 +428,12 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -448,7 +448,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -480,7 +480,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -492,7 +492,7 @@ body: |
|
|||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_ADDR64_RTN]]
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -508,7 +508,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
|
@ -524,7 +524,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
|
@ -558,7 +558,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -569,7 +569,7 @@ body: |
|
|||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -584,7 +584,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -599,7 +599,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4097
|
||||
|
@ -621,7 +621,7 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
|
@ -632,19 +632,19 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -666,7 +666,7 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -676,17 +676,17 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst 8, addrspace 1)
|
||||
|
@ -716,7 +716,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
|
@ -727,13 +727,13 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN]]
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
|
@ -749,7 +749,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
|
@ -783,7 +783,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -793,12 +793,12 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -813,7 +813,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
%2:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
|
|
@ -17,12 +17,12 @@ body: |
|
|||
; WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
|
||||
; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
|
||||
; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE32-LABEL: name: copy
|
||||
; WAVE32: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
|
||||
; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; WAVE32: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[DEF]], [[COPY]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:sgpr(p1) = COPY $sgpr2_sgpr3
|
||||
%1:vgpr(p1) = COPY %0
|
||||
%2:vgpr(s32) = G_IMPLICIT_DEF
|
||||
|
@ -46,7 +46,7 @@ body: |
|
|||
; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
|
||||
; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
|
||||
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
|
@ -55,7 +55,7 @@ body: |
|
|||
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
|
||||
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
|
||||
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -85,7 +85,7 @@ body: |
|
|||
; WAVE64: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
|
||||
; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_1]], implicit $exec
|
||||
; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE32-LABEL: name: copy_vcc_bank_sgpr_bank_2_uses
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
|
@ -96,7 +96,7 @@ body: |
|
|||
; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY3]], implicit-def $scc
|
||||
; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
|
||||
; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
@ -124,14 +124,14 @@ body: |
|
|||
; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc
|
||||
; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg
|
||||
; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
|
||||
; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s32) = COPY $vgpr3
|
||||
|
|
|
@ -24,9 +24,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
@ -92,9 +92,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
|
|
@ -25,9 +25,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
@ -91,9 +91,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MAX_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MAX_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
|
|
@ -24,9 +24,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
@ -92,9 +92,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
|
|
@ -25,9 +25,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
@ -91,9 +91,9 @@ body: |
|
|||
; GFX7: %7:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %8:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %9:vgpr_32 = nofpexcept V_MIN_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY3]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: %10:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY4]], 0, [[COPY5]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %11:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX7: %12:vreg_64 = nofpexcept V_MIN_F64_e64 0, [[COPY5]], 0, [[COPY6]], 0, 0, implicit $mode, implicit $exec
|
||||
|
|
|
@ -18,9 +18,9 @@ body: |
|
|||
; GCN: %4:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: %5:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: %6:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY3]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY3]], %5, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY3]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
%2:vgpr(s32) = COPY $vgpr1
|
||||
|
@ -133,16 +133,16 @@ body: |
|
|||
; GCN: %13:vgpr_32 = nofpexcept V_MUL_F32_e64 0, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 3, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: %15:vgpr_32 = nofpexcept V_MUL_F32_e64 3, [[COPY]], 1, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %6, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %7, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %8, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %9, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %10, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %11, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %12, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %13, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %14, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY1]], %15, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(p1) = COPY $vgpr2_vgpr3
|
||||
|
|
|
@ -18,16 +18,16 @@ body: |
|
|||
; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
|
||||
; GCN: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; VI-LABEL: name: fptoui
|
||||
; VI: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; VI: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; VI: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
|
||||
; VI: %3:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; VI: %4:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; VI: FLAT_STORE_DWORD [[COPY2]], %3, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; VI: FLAT_STORE_DWORD [[COPY2]], %4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
|
|
|
@ -14,10 +14,10 @@ body: |
|
|||
; CHECK-LABEL: name: fract_f64_neg
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3
|
||||
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1)
|
||||
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1)
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
|
@ -26,7 +26,7 @@ body: |
|
|||
; CHECK: %12:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 1, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK: %15:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %12, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %15, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%2:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%7:sgpr(s64) = G_CONSTANT i64 36
|
||||
|
@ -63,10 +63,10 @@ body: |
|
|||
; CHECK-LABEL: name: fract_f64_neg_abs
|
||||
; CHECK: liveins: $sgpr0_sgpr1
|
||||
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
|
||||
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 36, 0 :: (dereferenceable invariant load 16, align 4, addrspace 4)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub0_sub1
|
||||
; CHECK: [[COPY2:%[0-9]+]]:sreg_64 = COPY [[S_LOAD_DWORDX4_IMM]].sub2_sub3
|
||||
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0, 0 :: (load 8, addrspace 1)
|
||||
; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY2]], 0, 0 :: (load 8, addrspace 1)
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
|
@ -75,7 +75,7 @@ body: |
|
|||
; CHECK: %13:vreg_64 = nofpexcept V_ADD_F64_e64 0, [[COPY3]], 3, [[COPY4]], 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK: %16:vreg_64 = nofpexcept V_FRACT_F64_e64 0, %13, 0, 0, implicit $mode, implicit $exec
|
||||
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; CHECK: GLOBAL_STORE_DWORDX2_SADDR [[V_MOV_B32_e32_]], %16, [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%2:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%7:sgpr(s64) = G_CONSTANT i64 36
|
||||
|
|
|
@ -99,7 +99,7 @@ body: |
|
|||
; GCN-LABEL: name: implicit_def_p1_vgpr
|
||||
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = G_IMPLICIT_DEF
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4
|
||||
G_STORE %1, %0 :: (store 4, addrspace 1)
|
||||
|
@ -117,7 +117,7 @@ body: |
|
|||
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
|
||||
; GCN: $m0 = S_MOV_B32 -1
|
||||
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
%0:vgpr(p3) = G_IMPLICIT_DEF
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4
|
||||
G_STORE %1, %0 :: (store 4, addrspace 1)
|
||||
|
@ -134,7 +134,7 @@ body: |
|
|||
; GCN-LABEL: name: implicit_def_p4_vgpr
|
||||
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GCN: FLAT_STORE_DWORD [[DEF]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
%0:vgpr(p4) = G_IMPLICIT_DEF
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4
|
||||
G_STORE %1, %0 :: (store 4, addrspace 1)
|
||||
|
|
|
@ -16,12 +16,12 @@ body: |
|
|||
; GFX7-LABEL: name: load_atomic_flat_s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 0)
|
||||
|
@ -97,12 +97,12 @@ body: |
|
|||
; GFX7-LABEL: name: load_atomic_flat_s64_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s64_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 0)
|
||||
|
@ -242,7 +242,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -257,7 +257,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
|
@ -291,12 +291,12 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p0) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
|
|
@ -23,7 +23,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -33,17 +33,17 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load seq_cst 4, align 4, addrspace 1)
|
||||
|
@ -144,7 +144,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -154,17 +154,17 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load seq_cst 8, align 8, addrspace 1)
|
||||
|
@ -349,7 +349,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -369,7 +369,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -384,12 +384,12 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
|
@ -418,7 +418,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -428,7 +428,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -443,12 +443,12 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 4095, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -487,7 +487,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -507,7 +507,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -522,12 +522,12 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_atomic_global_s64_seq_cst_gep_m2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], -2048, 0, implicit $exec :: (load seq_cst 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
|
|
|
@ -19,22 +19,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_s32_from_4
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
|
||||
|
@ -57,22 +57,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v2s16_from_4
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v2s16_from_4
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v2s16_from_4
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v2s16_from_4
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
|
||||
|
@ -94,22 +94,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v2s32
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v2s32
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v2s32
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v2s32
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
|
||||
|
@ -130,22 +130,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v2s32_align4
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v2s32_align4
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v2s32_align4
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v2s32_align4
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
|
||||
|
@ -166,22 +166,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v4s16_align4
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v4s16_align4
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v4s16_align4
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v4s16_align4
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
|
||||
|
@ -203,22 +203,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v4s32_align4
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v4s32_align4
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v4s32_align4
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v4s32_align4
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 4)
|
||||
|
@ -240,22 +240,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_s64
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_s64
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_s64
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s64
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
|
||||
|
@ -277,22 +277,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_s64_align4
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_s64_align4
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_s64_align4
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s64_align4
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 4)
|
||||
|
@ -314,22 +314,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v2s64
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v2s64
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v2s64
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v2s64
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load 16, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 4)
|
||||
|
@ -425,22 +425,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_p3_from_4
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_p3_from_4
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_p3_from_4
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_p3_from_4
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
|
||||
|
@ -462,22 +462,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_p1_from_8
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_p1_from_8
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_p1_from_8
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_p1_from_8
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(p4) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
|
||||
|
@ -573,22 +573,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v2s16
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v2s16
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v2s16
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v2s16
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 4)
|
||||
|
@ -610,22 +610,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v4s16
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v4s16
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v4s16
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v4s16
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]], 0, 0 :: (load 8, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1 = COPY [[S_LOAD_DWORDX2_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 4)
|
||||
|
@ -684,22 +684,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v8s32
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v8s32
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v8s32
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v8s32
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0 :: (load 32, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, align 4, addrspace 4)
|
||||
|
@ -721,22 +721,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v16s32
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v16s32
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v16s32
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v16s32
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
|
||||
|
@ -758,22 +758,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_v8s64
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_v8s64
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_v8s64
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_v8s64
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0 :: (load 64, align 4, addrspace 4)
|
||||
; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(<8 x s64>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
|
||||
|
@ -799,22 +799,22 @@ body: |
|
|||
; GFX6-LABEL: name: load_constant_s32_from_4_gep_1020
|
||||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1020
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 255, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1020
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1020
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1020, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 1020
|
||||
|
@ -839,22 +839,22 @@ body: |
|
|||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1024
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 256, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1024
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1024
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1024, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 1024
|
||||
|
@ -879,24 +879,24 @@ body: |
|
|||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048575
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
|
||||
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048575
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 1048575, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048575
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048575
|
||||
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 1048575
|
||||
|
@ -921,24 +921,24 @@ body: |
|
|||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1048576
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM_ci [[COPY]], 262144, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM_ci]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1048576
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
|
||||
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1048576
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
|
||||
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 1048576
|
||||
|
@ -963,25 +963,25 @@ body: |
|
|||
; GFX6: liveins: $sgpr0_sgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4_gep_1073741823
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
|
||||
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4_gep_1073741823
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
|
||||
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4_gep_1073741823
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741823
|
||||
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_SGPR [[COPY]], [[S_MOV_B32_]], 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_SGPR]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 1073741823
|
||||
|
@ -1013,7 +1013,7 @@ body: |
|
|||
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_1
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
|
@ -1026,7 +1026,7 @@ body: |
|
|||
; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_1
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
|
@ -1039,12 +1039,12 @@ body: |
|
|||
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_1
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -1, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -1
|
||||
|
@ -1078,7 +1078,7 @@ body: |
|
|||
; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX6: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX7-LABEL: name: load_constant_s32_from_4_gep_negative_524288
|
||||
; GFX7: liveins: $sgpr0_sgpr1
|
||||
|
@ -1093,7 +1093,7 @@ body: |
|
|||
; GFX7: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX7: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX7: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX8-LABEL: name: load_constant_s32_from_4_gep_negative_524288
|
||||
; GFX8: liveins: $sgpr0_sgpr1
|
||||
|
@ -1108,12 +1108,12 @@ body: |
|
|||
; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], [[COPY2]], implicit-def $scc
|
||||
; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG_SEQUENCE1]], 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX8: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
; GFX10-LABEL: name: load_constant_s32_from_4_gep_negative_524288
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], -524288, 0 :: (load 4, addrspace 4)
|
||||
; GFX10: $sgpr0 = COPY [[S_LOAD_DWORD_IMM]]
|
||||
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -524288
|
||||
|
|
|
@ -19,22 +19,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_s32_from_4
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_4
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_4
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_4
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
|
||||
|
@ -56,22 +56,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_s32_from_2
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX7: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_2
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_2
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX9: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_2
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX10: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 0)
|
||||
|
@ -93,22 +93,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_s32_from_1
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 0)
|
||||
|
@ -129,19 +129,19 @@ body: |
|
|||
|
||||
; GFX7-LABEL: name: load_flat_v2s32
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_flat_v2s32
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_flat_v2s32
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_flat_v2s32
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
|
||||
|
@ -163,22 +163,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_v3s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
; GFX8-LABEL: name: load_flat_v3s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
; GFX9-LABEL: name: load_flat_v3s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
; GFX10-LABEL: name: load_flat_v3s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX10: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 0)
|
||||
|
@ -200,22 +200,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_v4s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX8-LABEL: name: load_flat_v4s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX9-LABEL: name: load_flat_v4s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX10-LABEL: name: load_flat_v4s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
|
||||
|
@ -237,22 +237,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_s64
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_flat_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_flat_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_flat_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
|
||||
|
@ -274,22 +274,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_v2s64
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX8-LABEL: name: load_flat_v2s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX9-LABEL: name: load_flat_v2s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX10-LABEL: name: load_flat_v2s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX10: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 0)
|
||||
|
@ -422,22 +422,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_p3_from_4
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX8-LABEL: name: load_flat_p3_from_4
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_flat_p3_from_4
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_flat_p3_from_4
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
|
||||
|
@ -459,22 +459,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_p1_from_8
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_flat_p1_from_8
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_flat_p1_from_8
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_flat_p1_from_8
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
|
||||
|
@ -566,22 +566,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_v2s16
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX8-LABEL: name: load_flat_v2s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_flat_v2s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_flat_v2s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX10: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 0)
|
||||
|
@ -603,22 +603,22 @@ body: |
|
|||
; GFX7-LABEL: name: load_flat_v4s16
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_flat_v4s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_flat_v4s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_flat_v4s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 0)
|
||||
|
@ -728,7 +728,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -743,12 +743,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -763,7 +763,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -797,7 +797,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -812,12 +812,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_2048
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -832,7 +832,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -866,7 +866,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -881,7 +881,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -896,7 +896,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -911,7 +911,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2047
|
||||
|
@ -945,7 +945,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -960,7 +960,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -975,7 +975,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -990,7 +990,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
|
@ -1024,7 +1024,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1039,12 +1039,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1059,7 +1059,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -1093,7 +1093,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1108,7 +1108,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1123,7 +1123,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1138,7 +1138,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4096
|
||||
|
@ -1172,7 +1172,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1187,7 +1187,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1202,7 +1202,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1217,7 +1217,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4095
|
||||
|
@ -1251,7 +1251,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1266,7 +1266,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1281,7 +1281,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1296,7 +1296,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4096
|
||||
|
@ -1330,7 +1330,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1345,7 +1345,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1360,7 +1360,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1375,7 +1375,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8191
|
||||
|
@ -1409,7 +1409,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1424,7 +1424,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1439,7 +1439,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1454,7 +1454,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8192
|
||||
|
@ -1488,7 +1488,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1503,7 +1503,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1518,7 +1518,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1533,7 +1533,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8191
|
||||
|
@ -1567,7 +1567,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1582,7 +1582,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1597,7 +1597,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1612,7 +1612,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
|
||||
; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8192
|
||||
|
|
|
@ -16,13 +16,13 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(p1) = COPY %0
|
||||
|
@ -47,13 +47,13 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr
|
||||
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
|
@ -81,13 +81,13 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr
|
||||
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
|
@ -125,7 +125,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr
|
||||
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
|
@ -141,7 +141,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
|
||||
; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
|
@ -169,7 +169,7 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095
|
||||
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
|
@ -195,7 +195,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
|
||||
; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
|
@ -225,7 +225,7 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096
|
||||
; GFX10: liveins: $sgpr0_sgpr1, $vgpr0
|
||||
|
@ -251,7 +251,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec
|
||||
; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
|
@ -279,13 +279,13 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4096
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4096
|
||||
|
@ -310,13 +310,13 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4097
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4097
|
||||
|
@ -351,7 +351,7 @@ body: |
|
|||
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4097
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
|
@ -367,7 +367,7 @@ body: |
|
|||
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -4097
|
||||
|
@ -392,13 +392,13 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2049, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_2049
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 1, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 2049
|
||||
|
@ -423,7 +423,7 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], -2049, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg2049
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
|
@ -439,7 +439,7 @@ body: |
|
|||
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -2049
|
||||
|
@ -463,13 +463,13 @@ body: |
|
|||
; GFX9: liveins: $sgpr0_sgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 4095, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967295
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[V_MOV_B32_e32_]], 2047, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4294967295
|
||||
|
@ -503,7 +503,7 @@ body: |
|
|||
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294967296
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
|
@ -519,7 +519,7 @@ body: |
|
|||
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4294967296
|
||||
|
@ -554,7 +554,7 @@ body: |
|
|||
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_4294971390
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
|
@ -570,7 +570,7 @@ body: |
|
|||
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 4294971390
|
||||
|
@ -605,7 +605,7 @@ body: |
|
|||
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967295
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
|
@ -621,7 +621,7 @@ body: |
|
|||
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -4294967295
|
||||
|
@ -655,7 +655,7 @@ body: |
|
|||
; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX9: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_sgpr_base_offset_neg4294967296
|
||||
; GFX10: liveins: $sgpr0_sgpr1
|
||||
|
@ -671,7 +671,7 @@ body: |
|
|||
; GFX10: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY3]], [[COPY4]], implicit-def $scc, implicit $scc
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
|
||||
; GFX10: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]]
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY5]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = COPY $sgpr0_sgpr1
|
||||
%1:sgpr(s64) = G_CONSTANT i64 -4294967296
|
||||
|
@ -693,12 +693,12 @@ body: |
|
|||
; GFX9-LABEL: name: load_global_s32_from_copy_undef_sgpr
|
||||
; GFX9: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_copy_undef_sgpr
|
||||
; GFX10: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY [[DEF]]
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:sgpr(p1) = G_IMPLICIT_DEF
|
||||
%1:vgpr(p1) = COPY %0
|
||||
|
@ -717,11 +717,11 @@ body: |
|
|||
bb.0:
|
||||
; GFX9-LABEL: name: load_global_s32_from_undef_vgpr
|
||||
; GFX9: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_undef_vgpr
|
||||
; GFX10: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = G_IMPLICIT_DEF
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
|
||||
|
|
|
@ -27,7 +27,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_4
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -37,27 +37,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_4
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_4
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_4
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_4
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
|
||||
|
@ -84,7 +84,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_2
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -94,27 +94,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_USHORT_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_2
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_2
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_USHORT:%[0-9]+]]:vgpr_32 = FLAT_LOAD_USHORT [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 2, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_USHORT]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_2
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_2
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_USHORT:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_USHORT]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 1)
|
||||
|
@ -141,7 +141,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -151,27 +151,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 1)
|
||||
|
@ -198,7 +198,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_v2s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -208,27 +208,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX2_ADDR64_:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[BUFFER_LOAD_DWORDX2_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_v2s32
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_global_v2s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_global_v2s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_global_v2s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
|
||||
|
@ -255,7 +255,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_v4s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -265,27 +265,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX4_ADDR64_:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUFFER_LOAD_DWORDX4_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_v4s32
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX8-LABEL: name: load_global_v4s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX9-LABEL: name: load_global_v4s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
; GFX10-LABEL: name: load_global_v4s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
|
||||
|
@ -312,27 +312,27 @@ body: |
|
|||
; GFX7-LABEL: name: load_global_s64
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s64
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_global_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_global_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_global_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
|
||||
|
@ -359,27 +359,27 @@ body: |
|
|||
; GFX7-LABEL: name: load_global_v2s64
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_v2s64
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX8-LABEL: name: load_global_v2s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 16, align 4, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
|
||||
; GFX9-LABEL: name: load_global_v2s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
; GFX10-LABEL: name: load_global_v2s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load 16, align 4, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 1)
|
||||
|
@ -500,27 +500,27 @@ body: |
|
|||
; GFX7-LABEL: name: load_global_p3_from_4
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_p3_from_4
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX8-LABEL: name: load_global_p3_from_4
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_global_p3_from_4
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_p3_from_4
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
|
||||
|
@ -547,27 +547,27 @@ body: |
|
|||
; GFX7-LABEL: name: load_global_p1_from_8
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_p1_from_8
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_global_p1_from_8
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_global_p1_from_8
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_global_p1_from_8
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
|
||||
|
@ -688,27 +688,27 @@ body: |
|
|||
; GFX7-LABEL: name: load_global_v2s16
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_v2s16
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX8-LABEL: name: load_global_v2s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
|
||||
; GFX9-LABEL: name: load_global_v2s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
; GFX10-LABEL: name: load_global_v2s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 1)
|
||||
|
@ -735,27 +735,27 @@ body: |
|
|||
; GFX7-LABEL: name: load_global_v4s16
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_v4s16
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX8-LABEL: name: load_global_v4s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = FLAT_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 8, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_LOAD_DWORDX2_]]
|
||||
; GFX9-LABEL: name: load_global_v4s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
; GFX10-LABEL: name: load_global_v4s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_LOAD_DWORDX2_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 1)
|
||||
|
@ -833,7 +833,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_2047
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -843,7 +843,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2047
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -858,7 +858,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -873,17 +873,17 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2047, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
@ -912,7 +912,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_2048
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -922,7 +922,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_2048
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -937,7 +937,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -952,12 +952,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 2048, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_2048
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -972,7 +972,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 2048
|
||||
|
@ -1011,7 +1011,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2047
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1031,7 +1031,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2047
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1046,7 +1046,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1061,17 +1061,17 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_m2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2047, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2047
|
||||
|
@ -1110,7 +1110,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_m2048
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1130,7 +1130,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m2048
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1145,7 +1145,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1160,17 +1160,17 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_m2048
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -2048, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -2048
|
||||
|
@ -1199,7 +1199,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_4095
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1209,7 +1209,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4095
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1224,7 +1224,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1239,12 +1239,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], 4095, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1259,7 +1259,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4095
|
||||
|
@ -1289,7 +1289,7 @@ body: |
|
|||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_4096
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1300,7 +1300,7 @@ body: |
|
|||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_4096
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1315,7 +1315,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1330,7 +1330,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_4096
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1345,7 +1345,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_4096
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1360,7 +1360,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 4096
|
||||
|
@ -1399,7 +1399,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4095
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1419,7 +1419,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4095
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1434,7 +1434,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1449,12 +1449,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4095, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_m4095
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1469,7 +1469,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4095
|
||||
|
@ -1508,7 +1508,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_m4096
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1528,7 +1528,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m4096
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1543,7 +1543,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1558,12 +1558,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[COPY]], -4096, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_m4096
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1578,7 +1578,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -4096
|
||||
|
@ -1608,7 +1608,7 @@ body: |
|
|||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_8191
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1619,7 +1619,7 @@ body: |
|
|||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8191
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8191
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1634,7 +1634,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1649,7 +1649,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_8191
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1664,7 +1664,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_8191
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1679,7 +1679,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8191
|
||||
|
@ -1709,7 +1709,7 @@ body: |
|
|||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_8192
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1720,7 +1720,7 @@ body: |
|
|||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_8192
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1735,7 +1735,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1750,7 +1750,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_8192
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1765,7 +1765,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_8192
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1780,7 +1780,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 8192
|
||||
|
@ -1819,7 +1819,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8191
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1839,7 +1839,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8191
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1854,7 +1854,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1869,7 +1869,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -1884,7 +1884,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -1899,7 +1899,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8191
|
||||
|
@ -1938,7 +1938,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-LABEL: name: load_global_s32_from_1_gep_m8192
|
||||
; GFX7: liveins: $vgpr0_vgpr1
|
||||
|
@ -1958,7 +1958,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE2]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_UBYTE_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_ADDR64 [[REG_SEQUENCE1]], [[REG_SEQUENCE3]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_s32_from_1_gep_m8192
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
|
@ -1973,7 +1973,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
|
@ -1988,7 +1988,7 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
|
||||
; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
|
||||
; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
|
@ -2003,7 +2003,7 @@ body: |
|
|||
; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
|
@ -2018,7 +2018,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load 1, addrspace 1)
|
||||
; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = G_CONSTANT i64 -8192
|
||||
|
|
|
@ -24,27 +24,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
|
||||
; GFX7: [[BUFFER_LOAD_DWORDX3_ADDR64_:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_ADDR64 [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
|
||||
; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[BUFFER_LOAD_DWORDX3_ADDR64_]]
|
||||
; GFX7-FLAT-LABEL: name: load_global_v3s32
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
|
||||
; GFX7-FLAT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
|
||||
; GFX7-FLAT: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
; GFX8-LABEL: name: load_global_v3s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
|
||||
; GFX8: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = FLAT_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load 12, align 4, addrspace 1)
|
||||
; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[FLAT_LOAD_DWORDX3_]]
|
||||
; GFX9-LABEL: name: load_global_v3s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
|
||||
; GFX9: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
|
||||
; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
|
||||
; GFX10-LABEL: name: load_global_v3s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, 0, 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
|
||||
; GFX10: [[GLOBAL_LOAD_DWORDX3_:%[0-9]+]]:vreg_96 = GLOBAL_LOAD_DWORDX3 [[COPY]], 0, 0, implicit $exec :: (load 12, align 4, addrspace 1)
|
||||
; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[GLOBAL_LOAD_DWORDX3_]]
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 1)
|
||||
|
|
|
@ -19,12 +19,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_s32_from_4
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
@ -49,12 +49,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_s32_from_2
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_2
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 5)
|
||||
|
@ -79,12 +79,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_s32_from_1
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
|
||||
|
@ -109,12 +109,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_p3_from_4
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_p3_from_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
@ -139,12 +139,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_p5_from_4
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_p5_from_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
@ -170,12 +170,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_v2s16
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_v2s16
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
@ -206,12 +206,12 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2047
|
||||
|
@ -240,14 +240,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||||
; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||||
; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2047, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2147483647
|
||||
|
@ -279,12 +279,12 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_2048
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 2048, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2048
|
||||
|
@ -313,14 +313,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2047, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -2047
|
||||
|
@ -349,14 +349,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -2048, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -2048
|
||||
|
@ -385,12 +385,12 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_4095
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
|
@ -419,14 +419,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_4096
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4096
|
||||
|
@ -455,14 +455,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4095, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -4095
|
||||
|
@ -491,14 +491,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4096, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -4096
|
||||
|
@ -527,14 +527,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_8191
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 8191
|
||||
|
@ -563,14 +563,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_8192
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 8192
|
||||
|
@ -599,14 +599,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8191, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -8191
|
||||
|
@ -635,14 +635,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -8192, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -8192
|
||||
|
@ -666,10 +666,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_4_constant_0
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_4_constant_0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
@ -691,10 +691,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
%0:sgpr(p5) = G_CONSTANT i32 16
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
@ -716,10 +716,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_1_constant_4095
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_constant_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
|
||||
|
@ -742,11 +742,11 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: load_private_s32_from_1_constant_4096
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_constant_4096
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
|
||||
|
@ -770,10 +770,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_fi
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_fi
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
@ -796,10 +796,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4095
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
|
@ -825,10 +825,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:sgpr(s32) = G_CONSTANT i32 4095
|
||||
|
@ -858,13 +858,13 @@ body: |
|
|||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4096
|
||||
|
@ -890,11 +890,11 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: load_private_s32_from_neg1
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_neg1
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 -1
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
|
|
@ -17,12 +17,12 @@ regBankSelected: true
|
|||
# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||||
|
||||
# Immediate offset:
|
||||
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0
|
||||
# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0, 0
|
||||
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
|
||||
# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0
|
||||
|
||||
# Max immediate offset for SI
|
||||
# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0, 0
|
||||
# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0, 0
|
||||
# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
|
||||
# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0
|
||||
|
||||
# Immediate overflow for SI
|
||||
# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
|
||||
|
@ -52,8 +52,8 @@ regBankSelected: true
|
|||
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
|
||||
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||||
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
|
||||
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
|
||||
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0
|
||||
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
|
||||
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0
|
||||
|
||||
# Immediate overflow for CI
|
||||
# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
|
@ -66,7 +66,7 @@ regBankSelected: true
|
|||
# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
|
||||
# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||||
# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
|
||||
# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
|
||||
# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
|
||||
|
||||
# Max 32-bit byte offset
|
||||
# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
|
||||
|
@ -84,8 +84,8 @@ regBankSelected: true
|
|||
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
|
||||
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||||
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
|
||||
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
|
||||
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0
|
||||
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
|
||||
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0
|
||||
|
||||
# Pointer loads
|
||||
# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
|
||||
|
@ -192,8 +192,8 @@ body: |
|
|||
# GCN-LABEL: name: constant_address_positive{{$}}
|
||||
# GCN: %0:sreg_64 = S_MOV_B64 44
|
||||
|
||||
# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
|
||||
# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
|
||||
# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load 4, addrspace 4)
|
||||
# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load 4, addrspace 4)
|
||||
|
||||
---
|
||||
|
||||
|
|
|
@ -18,16 +18,16 @@ body: |
|
|||
; WAVE64: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
|
||||
; WAVE64: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; WAVE64: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE64: FLAT_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; WAVE32-LABEL: name: sitofp
|
||||
; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; WAVE32: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
|
||||
; WAVE32: [[V_CVT_F32_I32_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
|
||||
; WAVE32: [[V_CVT_F32_I32_e64_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e64 [[COPY1]], 0, 0, implicit $mode, implicit $exec
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; WAVE32: GLOBAL_STORE_DWORD [[COPY2]], [[V_CVT_F32_I32_e64_1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
|
||||
%1:vgpr(s32) = COPY $vgpr0
|
||||
|
|
|
@ -17,12 +17,12 @@ body: |
|
|||
; GFX7: liveins: $vgpr0, $vgpr1_vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
|
||||
; GFX9-LABEL: name: atomic_store_flat_s32_seq_cst
|
||||
; GFX9: liveins: $vgpr0, $vgpr1_vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr1_vgpr2
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 4)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p0) = COPY $vgpr1_vgpr2
|
||||
G_STORE %0, %1 :: (store seq_cst 4, align 4, addrspace 0)
|
||||
|
@ -152,12 +152,12 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
|
||||
; GFX9-LABEL: name: atomic_store_flat_s64_seq_cst
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY1]], [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store seq_cst 8)
|
||||
%0:vgpr(s64) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p0) = COPY $vgpr2_vgpr3
|
||||
G_STORE %0, %1 :: (store seq_cst 8, align 8, addrspace 0)
|
||||
|
|
|
@ -19,22 +19,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8-LABEL: name: store_flat_s32_to_4
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9-LABEL: name: store_flat_s32_to_4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10-LABEL: name: store_flat_s32_to_4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
|
||||
|
@ -55,22 +55,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
; GFX7: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
; GFX8-LABEL: name: store_flat_s32_to_2
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
; GFX9-LABEL: name: store_flat_s32_to_2
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
; GFX9: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
; GFX10-LABEL: name: store_flat_s32_to_2
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
; GFX10: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 2, align 2, addrspace 0)
|
||||
|
@ -91,22 +91,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
; GFX7: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
; GFX8-LABEL: name: store_flat_s32_to_1
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
; GFX9-LABEL: name: store_flat_s32_to_1
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
; GFX9: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
; GFX10-LABEL: name: store_flat_s32_to_1
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
; GFX10: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 0)
|
||||
|
@ -128,22 +128,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8-LABEL: name: store_flat_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9-LABEL: name: store_flat_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10-LABEL: name: store_flat_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
|
||||
|
@ -237,22 +237,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8-LABEL: name: store_flat_v2s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9-LABEL: name: store_flat_v2s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10-LABEL: name: store_flat_v2s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
|
||||
|
@ -273,22 +273,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
; GFX7: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
; GFX8-LABEL: name: store_flat_v3s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
; GFX9-LABEL: name: store_flat_v3s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
; GFX9: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
; GFX10-LABEL: name: store_flat_v3s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
; GFX10: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
G_STORE %1, %0 :: (store 12, align 16, addrspace 0)
|
||||
|
@ -309,22 +309,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX8-LABEL: name: store_flat_v4s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX9-LABEL: name: store_flat_v4s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX10-LABEL: name: store_flat_v4s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
|
||||
|
@ -346,22 +346,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8-LABEL: name: store_flat_v2s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9-LABEL: name: store_flat_v2s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10-LABEL: name: store_flat_v2s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
|
||||
|
@ -383,22 +383,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8-LABEL: name: store_flat_v4s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9-LABEL: name: store_flat_v4s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10-LABEL: name: store_flat_v4s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
|
||||
|
@ -493,22 +493,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX8-LABEL: name: store_flat_v2s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX9-LABEL: name: store_flat_v2s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX9: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX10-LABEL: name: store_flat_v2s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
; GFX10: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store 16, align 16, addrspace 0)
|
||||
|
@ -530,22 +530,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8-LABEL: name: store_flat_p1
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9-LABEL: name: store_flat_p1
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10-LABEL: name: store_flat_p1
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 0)
|
||||
|
@ -604,22 +604,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8-LABEL: name: store_flat_p3
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9-LABEL: name: store_flat_p3
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10-LABEL: name: store_flat_p3
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 4, align 4, addrspace 0)
|
||||
|
@ -677,22 +677,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
; GFX8-LABEL: name: store_atomic_flat_s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
; GFX9-LABEL: name: store_atomic_flat_s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
; GFX10-LABEL: name: store_atomic_flat_s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
; GFX10: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 0)
|
||||
|
@ -714,22 +714,22 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
; GFX8-LABEL: name: store_atomic_flat_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
; GFX9-LABEL: name: store_atomic_flat_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
; GFX9: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
; GFX10-LABEL: name: store_atomic_flat_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
; GFX10: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 0)
|
||||
|
@ -761,7 +761,7 @@ body: |
|
|||
; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8-LABEL: name: store_flat_s32_gep_2047
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -776,12 +776,12 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9-LABEL: name: store_flat_s32_gep_2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10-LABEL: name: store_flat_s32_gep_2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -796,7 +796,7 @@ body: |
|
|||
; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
|
|
@ -26,7 +26,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX7-LABEL: name: store_global_s32_to_4
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -36,27 +36,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_s32_to_4
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_s32_to_4
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_s32_to_4
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_s32_to_4
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
|
||||
|
@ -82,7 +82,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
; GFX6: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
; GFX7-LABEL: name: store_global_s32_to_2
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -92,27 +92,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
; GFX7: BUFFER_STORE_SHORT_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_s32_to_2
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_s32_to_2
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
|
||||
; GFX8: FLAT_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 2, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_s32_to_2
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_s32_to_2
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_SHORT [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 2, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 2, align 2, addrspace 1)
|
||||
|
@ -138,7 +138,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
; GFX6: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
; GFX7-LABEL: name: store_global_s32_to_1
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -148,27 +148,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
; GFX7: BUFFER_STORE_BYTE_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_s32_to_1
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_s32_to_1
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
|
||||
; GFX8: FLAT_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 1, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_s32_to_1
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_s32_to_1
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_BYTE [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 1, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 1)
|
||||
|
@ -195,27 +195,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_s64
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
|
||||
|
@ -288,7 +288,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX6: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX7-LABEL: name: store_global_v2s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -298,27 +298,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX7: BUFFER_STORE_DWORDX2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_v2s32
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_v2s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_v2s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_v2s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s32>) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
|
||||
|
@ -344,7 +344,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX6: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX7-LABEL: name: store_global_v4s32
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -354,27 +354,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX7: BUFFER_STORE_DWORDX4_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_v4s32
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_v4s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_v4s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_v4s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
|
||||
|
@ -401,27 +401,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_v2s16
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_v2s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_v2s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_v2s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s16>) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
|
||||
|
@ -448,27 +448,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_v4s16
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_v4s16
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_v4s16
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_v4s16
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<4 x s16>) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
|
||||
|
@ -542,27 +542,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_v2s64
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_v2s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 16, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_v2s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_v2s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 16, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
|
||||
G_STORE %1, %0 :: (store 16, align 16, addrspace 1)
|
||||
|
@ -589,27 +589,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_p1
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_p1
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 8, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_p1
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_p1
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p1) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store 8, align 8, addrspace 1)
|
||||
|
@ -683,27 +683,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_p3
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_p3
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_p3
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_p3
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(p3) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store 4, align 4, addrspace 1)
|
||||
|
@ -776,27 +776,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_atomic_global_s32
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
|
||||
; GFX8-LABEL: name: store_atomic_global_s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 4, addrspace 1)
|
||||
; GFX9-LABEL: name: store_atomic_global_s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
|
||||
; GFX10-LABEL: name: store_atomic_global_s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
G_STORE %1, %0 :: (store monotonic 4, align 4, addrspace 1)
|
||||
|
@ -823,27 +823,27 @@ body: |
|
|||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
|
||||
; GFX7: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_atomic_global_s64
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
|
||||
; GFX8-LABEL: name: store_atomic_global_s64
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store monotonic 8, addrspace 1)
|
||||
; GFX9-LABEL: name: store_atomic_global_s64
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
|
||||
; GFX10-LABEL: name: store_atomic_global_s64
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store monotonic 8, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s64) = COPY $vgpr2_vgpr3
|
||||
G_STORE %1, %0 :: (store monotonic 8, align 8, addrspace 1)
|
||||
|
@ -870,7 +870,7 @@ body: |
|
|||
; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX6: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX6: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX7-LABEL: name: store_global_s32_gep_2047
|
||||
; GFX7: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -880,7 +880,7 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX7: BUFFER_STORE_DWORD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_s32_gep_2047
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -895,7 +895,7 @@ body: |
|
|||
; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_s32_gep_2047
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
|
@ -910,17 +910,17 @@ body: |
|
|||
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
|
||||
; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
|
||||
; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
|
||||
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_s32_gep_2047
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_s32_gep_2047
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(s32) = COPY $vgpr2
|
||||
%2:vgpr(s64) = G_CONSTANT i64 2047
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=hawaii -mattr=+flat-for-global -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7-FLAT %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s
|
||||
|
@ -26,27 +27,27 @@ body: |
|
|||
; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
|
||||
; GFX7: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
|
||||
; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3
|
||||
; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
|
||||
; GFX7: BUFFER_STORE_DWORDX3_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
|
||||
; GFX7-FLAT-LABEL: name: store_global_v3s32
|
||||
; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX7-FLAT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX7-FLAT: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
|
||||
; GFX7-FLAT: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
|
||||
; GFX8-LABEL: name: store_global_v3s32
|
||||
; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX8: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
|
||||
; GFX8: FLAT_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store 12, align 16, addrspace 1)
|
||||
; GFX9-LABEL: name: store_global_v3s32
|
||||
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
|
||||
; GFX9: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
|
||||
; GFX10-LABEL: name: store_global_v3s32
|
||||
; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
|
||||
; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GFX10: [[COPY1:%[0-9]+]]:vreg_96 = COPY $vgpr2_vgpr3_vgpr4
|
||||
; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
|
||||
; GFX10: GLOBAL_STORE_DWORDX3 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store 12, align 16, addrspace 1)
|
||||
%0:vgpr(p1) = COPY $vgpr0_vgpr1
|
||||
%1:vgpr(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4
|
||||
G_STORE %1, %0 :: (store 12, align 16, addrspace 1)
|
||||
|
|
|
@ -21,12 +21,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_s32_to_4
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -52,12 +52,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_s32_to_2
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
|
||||
|
@ -83,12 +83,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_s32_to_1
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
|
||||
|
@ -114,12 +114,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_v2s16
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -145,12 +145,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_p3
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -176,12 +176,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_p5
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -206,10 +206,10 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_s32_to_1_fi_offset_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -236,10 +236,10 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4095
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
@ -265,11 +265,11 @@ body: |
|
|||
; GFX6-LABEL: name: function_store_private_s32_to_1_constant_4096
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: function_store_private_s32_to_1_constant_4096
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
@ -294,12 +294,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_s32_to_4
|
||||
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -324,12 +324,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_s32_to_2
|
||||
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
|
||||
|
@ -354,12 +354,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_s32_to_1
|
||||
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
|
||||
|
@ -384,12 +384,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_v2s16
|
||||
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -414,12 +414,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_p3
|
||||
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -444,12 +444,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_p5
|
||||
; GFX9: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
@ -475,11 +475,11 @@ body: |
|
|||
; GFX6-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
|
||||
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_s32_to_1_fi_offset_4095
|
||||
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_PTR_ADD %0, %1
|
||||
|
@ -507,11 +507,11 @@ body: |
|
|||
; GFX6-LABEL: name: kernel_store_private_s32_to_1_constant_4095
|
||||
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4095
|
||||
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
@ -538,12 +538,12 @@ body: |
|
|||
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: kernel_store_private_s32_to_1_constant_4096
|
||||
; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc__vg
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -56,7 +56,7 @@ define amdgpu_ps <2 x float> @raw_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc__vg
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub0
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY8]]
|
||||
|
@ -81,7 +81,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__vgp
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_vof
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -170,7 +170,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__sgp
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -195,7 +195,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
|
@ -217,7 +217,7 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_OFFEN_RTN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
|
||||
|
|
|
@ -16,7 +16,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
|
||||
; CHECK: $vgpr0 = COPY [[COPY8]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
|
@ -40,7 +40,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
|
||||
; CHECK: S_ENDPGM 0
|
||||
%ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -84,7 +84,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
|
@ -137,7 +137,7 @@ define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY10]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
|
@ -165,7 +165,7 @@ define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0
|
||||
; CHECK: $vgpr0 = COPY [[COPY8]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
|
|
|
@ -28,7 +28,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -59,7 +59,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -89,7 +89,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
|
|||
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
|
||||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -119,7 +119,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_v
|
|||
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
|
||||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -200,7 +200,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__sgp
|
|||
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -284,7 +284,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__0_v
|
|||
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -342,7 +342,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
|
|||
; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX908: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX908: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
|
||||
; GFX90A: bb.1 (%ir-block.0):
|
||||
|
@ -355,7 +355,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
|
||||
ret void
|
||||
|
@ -386,7 +386,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__v
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -415,7 +415,7 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0
|
|||
; GFX90A: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
|
||||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; UNPACKED-LABEL: name: raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
|
@ -27,7 +27,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__sgpr_rsrc__vgpr_voffset__sgpr
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -45,7 +45,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; UNPACKED-LABEL: name: raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
|
@ -58,7 +58,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_format_v2f16__sgpr_rsrc__vgpr_voffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
||||
|
@ -93,7 +93,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
|
||||
; PACKED: $vgpr0 = COPY [[COPY6]]
|
||||
|
@ -109,7 +109,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
|
||||
|
@ -169,7 +169,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
|
|||
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -209,7 +209,7 @@ define amdgpu_ps half @raw_buffer_load_format_f16__vgpr_rsrc__sgpr_voffset__vgpr
|
|||
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -234,7 +234,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
|
||||
; PACKED: $vgpr0 = COPY [[COPY6]]
|
||||
|
@ -250,7 +250,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
|
||||
|
|
|
@ -13,7 +13,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__sgpr_rsrc__vgpr_voffset__sgp
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -31,7 +31,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32__sgpr_rsrc__vgpr_voff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY6]]
|
||||
|
@ -52,7 +52,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_format_v3f32__sgpr_rsrc__vgpr_voff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
|
||||
|
@ -75,7 +75,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
|
||||
|
@ -121,7 +121,7 @@ define amdgpu_ps float @raw_buffer_load_format_f32__vgpr_rsrc__sgpr_voffset__vgp
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -146,7 +146,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -34,7 +34,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__sgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr7
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -69,7 +69,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -114,7 +114,7 @@ define amdgpu_ps float @raw_buffer_load_f32__vgpr_rsrc__vgpr_voffset__vgpr_soffs
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -140,7 +140,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
|
||||
|
@ -159,7 +159,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
|
||||
|
@ -178,7 +178,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
|
||||
|
@ -197,7 +197,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 6, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
|
||||
|
@ -216,7 +216,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 5, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
|
||||
|
@ -235,7 +235,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 7, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
|
||||
|
@ -254,7 +254,7 @@ define amdgpu_ps <2 x float> @raw_buffer_load_v2f32__sgpr_rsrc__vgpr_voffset__sg
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY6]]
|
||||
|
@ -275,7 +275,7 @@ define amdgpu_ps <3 x float> @raw_buffer_load_v3f32__sgpr_rsrc__vgpr_voffset__sg
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX3_OFFEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_OFFEN]].sub2
|
||||
|
@ -298,7 +298,7 @@ define amdgpu_ps <4 x float> @raw_buffer_load_v4f32__sgpr_rsrc__vgpr_voffset__sg
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
|
||||
|
@ -323,7 +323,7 @@ define amdgpu_ps half @raw_buffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffse
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -341,7 +341,7 @@ define amdgpu_ps <2 x half> @raw_buffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sgp
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -365,7 +365,7 @@ define amdgpu_ps <4 x half> @raw_buffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sgp
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY6]]
|
||||
|
@ -386,7 +386,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -406,7 +406,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], 0, 8, implicit $exec
|
||||
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
|
@ -444,7 +444,7 @@ define amdgpu_ps half @raw_buffer_load_f16__vgpr_rsrc__vgpr_voffset__sgpr_soffse
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -486,7 +486,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -512,7 +512,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vdpr_voffset__sgpr_soffs
|
|||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %soffset, i32 0)
|
||||
|
@ -529,7 +529,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE]], [[COPY4]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
|
@ -548,7 +548,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE]], [[COPY4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
|
||||
|
@ -566,7 +566,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 16
|
||||
|
@ -585,7 +585,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
|
@ -607,7 +607,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4096
|
||||
|
@ -626,7 +626,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
|
@ -644,7 +644,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
|
@ -664,7 +664,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
||||
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%soffset = add i32 %soffset.base, 16
|
||||
|
@ -685,7 +685,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%soffset = add i32 %soffset.base, 4095
|
||||
|
@ -706,7 +706,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%soffset = add i32 %soffset.base, 4096
|
||||
|
@ -744,7 +744,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -790,7 +790,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %13, [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -27,7 +27,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -44,7 +44,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -56,7 +56,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -91,7 +91,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -116,7 +116,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -211,7 +211,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -240,7 +240,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -253,7 +253,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
|
@ -275,7 +275,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
|
@ -310,7 +310,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -323,7 +323,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -346,7 +346,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -359,7 +359,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -385,7 +385,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -401,7 +401,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -448,7 +448,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -489,7 +489,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -51,7 +51,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -94,7 +94,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -132,7 +132,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
|
@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
|
@ -199,7 +199,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -220,7 +220,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -244,7 +244,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr
|
|||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -103,7 +103,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
|
|||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -148,7 +148,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
|
||||
ret void
|
||||
|
@ -191,7 +191,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
|
||||
ret void
|
||||
|
@ -209,7 +209,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
|
||||
ret void
|
||||
|
@ -227,7 +227,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
|
||||
ret void
|
||||
|
@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 6, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
|
||||
ret void
|
||||
|
@ -263,7 +263,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 5, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
|
||||
ret void
|
||||
|
@ -281,7 +281,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 7, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
|
||||
ret void
|
||||
|
@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -344,7 +344,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -362,7 +362,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i8
|
||||
call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -381,7 +381,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i16
|
||||
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -418,7 +418,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -474,7 +474,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -498,7 +498,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFSET_exact [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -517,7 +517,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -535,7 +535,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -554,7 +554,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -576,7 +576,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -595,7 +595,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
|
@ -613,7 +613,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
|
@ -631,7 +631,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -650,7 +650,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -672,7 +672,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -711,7 +711,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %13, [[REG_SEQUENCE3]], [[COPY6]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -755,7 +755,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY6]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
@ -13,7 +13,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
|
@ -26,7 +26,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
|
@ -44,7 +44,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub0
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_OFFEN]].sub1
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
||||
|
@ -68,7 +68,7 @@ define amdgpu_ps <2 x half> @raw_tbuffer_load_v2f16__sgpr_rsrc__vgpr_voffset__sg
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
|
@ -92,7 +92,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub0
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub1
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_OFFEN]].sub2
|
||||
|
@ -126,7 +126,7 @@ define amdgpu_ps <4 x half> @raw_tbuffer_load_v4f16__sgpr_rsrc__vgpr_voffset__sg
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub0
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_OFFEN]].sub1
|
||||
; PACKED: $vgpr0 = COPY [[COPY6]]
|
||||
|
@ -167,7 +167,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
|
|||
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -207,7 +207,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__vgpr_rsrc__sgpr_voffset__vgpr_soffs
|
|||
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -232,7 +232,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
|
||||
|
@ -245,7 +245,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
|
||||
|
@ -263,7 +263,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
|
||||
|
@ -276,7 +276,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
|
||||
|
@ -294,7 +294,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
|
||||
|
@ -307,7 +307,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
|
||||
|
@ -325,7 +325,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_OFFEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
|
||||
|
@ -338,7 +338,7 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_OFFEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
|
||||
|
|
|
@ -12,7 +12,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
|
@ -30,7 +30,7 @@ define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32__sgpr_rsrc__vgpr_voffset__s
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_OFFEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_OFFEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY6]]
|
||||
|
@ -51,7 +51,7 @@ define amdgpu_ps <3 x float> @raw_tbuffer_load_v3f32__sgpr_rsrc__vgpr_voffset__s
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_OFFEN]].sub2
|
||||
|
@ -74,7 +74,7 @@ define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32__sgpr_rsrc__vgpr_voffset__s
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub0
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub1
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_OFFEN]].sub2
|
||||
|
@ -119,7 +119,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY5]], implicit $exec
|
||||
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY6]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -144,7 +144,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
|
||||
|
@ -162,7 +162,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
|
||||
|
@ -180,7 +180,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
|
||||
|
@ -198,7 +198,7 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_OFFEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
ret void
|
||||
|
@ -49,7 +49,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so
|
|||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f16__sgpr_rsrc__vgpr_voffset__sgpr_so
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
ret void
|
||||
|
@ -93,7 +93,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so
|
|||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY1]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -108,7 +108,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_so
|
|||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
ret void
|
||||
|
@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -179,7 +179,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -224,7 +224,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
|
|||
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -263,7 +263,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__vgpr_voffset__vgpr_soff
|
|||
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
|
|||
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -349,7 +349,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__vgpr_rsrc__sgpr_voffset__vgpr_soff
|
|||
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -374,7 +374,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_glc
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -387,7 +387,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
|
||||
ret void
|
||||
|
@ -405,7 +405,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -418,7 +418,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
|
||||
ret void
|
||||
|
@ -436,7 +436,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc_glc
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -449,7 +449,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
|
||||
ret void
|
||||
|
@ -467,7 +467,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_D16_X_gfx80_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
|
||||
ret void
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -27,7 +27,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
ret void
|
||||
|
@ -62,7 +62,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -98,7 +98,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffs
|
|||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -143,7 +143,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
|
|||
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -182,7 +182,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__vgpr_voffset__vgpr_soffs
|
|||
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -228,7 +228,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
|
|||
; UNPACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -268,7 +268,7 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
|
|||
; PACKED: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
@ -14,7 +14,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
ret void
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_ps void @raw_tbuffer_store_v2f32__sgpr_rsrc__vgpr_voffset__sgpr_so
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
ret void
|
||||
|
@ -57,7 +57,7 @@ define amdgpu_ps void @raw_tbuffer_store_v3f32__sgpr_rsrc__vgpr_voffset__sgpr_so
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE]], [[COPY7]], [[REG_SEQUENCE1]], [[COPY8]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
ret void
|
||||
|
@ -80,7 +80,7 @@ define amdgpu_ps void @raw_tbuffer_store_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_so
|
|||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE]], [[COPY8]], [[REG_SEQUENCE1]], [[COPY9]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
ret void
|
||||
|
@ -100,7 +100,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__sgpr_voffset__sgpr_soff
|
|||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr7
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
ret void
|
||||
|
@ -135,7 +135,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 94, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -180,7 +180,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__vgpr_voffset__vgpr_soff
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -226,7 +226,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__vgpr_rsrc__sgpr_voffset__vgpr_soff
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U32_e64_]], [[S_AND_B32_]], implicit-def $scc
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY7]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -252,7 +252,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 1)
|
||||
ret void
|
||||
|
@ -271,7 +271,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 2, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 2)
|
||||
ret void
|
||||
|
@ -290,7 +290,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 1, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 3, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 3)
|
||||
ret void
|
||||
|
@ -309,7 +309,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 4, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
|
||||
ret void
|
||||
|
@ -328,7 +328,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vdpr_voffset__sgpr_soff
|
|||
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 94, i32 0)
|
||||
ret void
|
||||
|
@ -345,7 +345,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFSET_exact [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 94, i32 0)
|
||||
ret void
|
||||
|
@ -364,7 +364,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY6]], [[REG_SEQUENCE]], [[COPY5]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 94, i32 0)
|
||||
ret void
|
||||
|
@ -382,7 +382,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 16, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset = add i32 %voffset.base, 16
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
|
@ -401,7 +401,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
|
@ -423,7 +423,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE]], [[COPY6]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset = add i32 %voffset.base, 4096
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
|
@ -442,7 +442,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 94, i32 0)
|
||||
ret void
|
||||
|
@ -460,7 +460,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 94, i32 0)
|
||||
ret void
|
||||
|
@ -480,7 +480,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
|
||||
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%soffset = add i32 %soffset.base, 16
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
|
@ -501,7 +501,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%soffset = add i32 %soffset.base, 4095
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
|
@ -522,7 +522,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY6]], [[S_MOV_B32_]], implicit-def $scc
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%soffset = add i32 %soffset.base, 4096
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
|
||||
|
@ -560,7 +560,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[COPY5]], [[REG_SEQUENCE3]], [[S_ADD_I32_]], 0, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -606,7 +606,7 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
|
||||
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: TBUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY]], [[V_ADD_U32_e64_]], [[REG_SEQUENCE3]], [[COPY6]], 904, 94, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource" + 5000, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -38,7 +38,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32_noret__vgpr_val__sgpr_rsrc_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -62,7 +62,7 @@ define amdgpu_ps <2 x float> @struct_buffer_atomic_add_i64__vgpr_val__sgpr_rsrc_
|
|||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub0
|
||||
; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY9]]
|
||||
|
@ -89,7 +89,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i64_noret__vgpr_val__sgpr_rsrc__
|
|||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 8 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -132,7 +132,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__sgpr_val__vgpr_rsrc__sgpr_
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -184,7 +184,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_i32_noret__sgpr_val__vgpr_rsrc__
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -212,7 +212,7 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_BOTHEN_RTN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
|
||||
|
|
|
@ -19,7 +19,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg
|
|||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
|
||||
; CHECK: $vgpr0 = COPY [[COPY9]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
|
@ -45,7 +45,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_noret_i32__vgpr_val__vgpr_cm
|
|||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
|
||||
; CHECK: S_ENDPGM 0
|
||||
%ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -92,7 +92,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vg
|
|||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
|
||||
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
|
@ -148,7 +148,7 @@ define amdgpu_ps void @struct_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cm
|
|||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1
|
||||
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
|
@ -178,7 +178,7 @@ define amdgpu_ps float @struct_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sg
|
|||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0
|
||||
; CHECK: $vgpr0 = COPY [[COPY9]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
|
|
|
@ -32,7 +32,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -67,7 +67,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
@ -99,7 +99,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -131,7 +131,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -218,7 +218,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
|
|||
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; GFX90A: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -306,7 +306,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__sgpr_val__vgpr_rsrc__
|
|||
; GFX90A: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; GFX90A: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; GFX90A: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX90A: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; GFX90A: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -334,7 +334,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX908: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX908: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX908: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX908: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX908: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc
|
||||
; GFX90A: bb.1 (%ir-block.0):
|
||||
|
@ -349,7 +349,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
|
||||
ret void
|
||||
|
@ -367,7 +367,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX908: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX908: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX908: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX908: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX908: S_ENDPGM 0
|
||||
; GFX90A-LABEL: name: struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset_slc
|
||||
; GFX90A: bb.1 (%ir-block.0):
|
||||
|
@ -380,7 +380,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2)
|
||||
ret void
|
||||
|
@ -415,7 +415,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
|
|||
; GFX90A: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GFX90A: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -446,7 +446,7 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
|
|||
; GFX90A: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
|
||||
; GFX90A: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX90A: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GFX90A: S_ENDPGM 0
|
||||
%ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
@ -30,7 +30,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -50,7 +50,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
||||
|
@ -76,7 +76,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_format_v2f16__sgpr_rsrc__vgpr_vi
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_XY_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -102,7 +102,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
|
||||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
|
||||
|
@ -138,7 +138,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__sgpr_rsrc__vgpr_vi
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
|
||||
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
|
||||
; PACKED: $vgpr0 = COPY [[COPY7]]
|
||||
|
@ -183,7 +183,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
|
|||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -247,7 +247,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_format_v4f16__vpr_rsrc__sgpr_vin
|
|||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -277,7 +277,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffsset_add_4095
|
||||
|
@ -292,7 +292,7 @@ define amdgpu_ps half @struct_buffer_load_format_f16__sgpr_rsrc__vgpr_vindex__vg
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
|
@ -313,7 +313,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; PACKED-LABEL: name: struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
@ -328,7 +328,7 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32__sgpr_rsrc__vgpr_v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY7]]
|
||||
|
@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_format_v3f32__sgpr_rsrc__vgpr_v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2
|
||||
|
@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__sgpr_rsrc__vgpr_v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2
|
||||
|
@ -132,7 +132,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32__vpr_rsrc__sgpr_vi
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -166,7 +166,7 @@ define amdgpu_ps float @struct_buffer_load_format_f32__sgpr_rsrc__vgpr_vindex__v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
|
@ -187,7 +187,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
|
|
@ -16,7 +16,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -37,7 +37,7 @@ define amdgpu_ps <2 x float> @struct_buffer_load_v2f32__sgpr_rsrc__vgpr_vindex__
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY7]]
|
||||
|
@ -61,7 +61,7 @@ define amdgpu_ps <3 x float> @struct_buffer_load_v3f32__sgpr_rsrc__vgpr_vindex__
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX3_BOTHEN:%[0-9]+]]:vreg_96 = BUFFER_LOAD_DWORDX3_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX3_BOTHEN]].sub2
|
||||
|
@ -87,7 +87,7 @@ define amdgpu_ps <4 x float> @struct_buffer_load_v4f32__sgpr_rsrc__vgpr_vindex__
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_BOTHEN]].sub2
|
||||
|
@ -116,7 +116,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -137,7 +137,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
|
@ -158,7 +158,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 64
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 64, i32 0)
|
||||
|
@ -200,7 +200,7 @@ define amdgpu_ps float @struct_buffer_load_f32__vgpr_rsrc__sgpr_vindex__sgpr_vof
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -227,7 +227,7 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -249,7 +249,7 @@ define amdgpu_ps float @struct_buffer_load_i8_sext__sgpr_rsrc__vgpr_vindex__vgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "BufferResource", addrspace 4)
|
||||
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], 0, 8, implicit $exec
|
||||
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
|
@ -272,7 +272,7 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -294,7 +294,7 @@ define amdgpu_ps float @struct_buffer_load_i16_sext__sgpr_rsrc__vgpr_vindex__vgp
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], 0, 16, implicit $exec
|
||||
; CHECK: $vgpr0 = COPY [[V_BFE_I32_e64_]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
|
@ -318,7 +318,7 @@ define amdgpu_ps half @struct_buffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voff
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -339,7 +339,7 @@ define amdgpu_ps <2 x half> @struct_buffer_load_v2f16__sgpr_rsrc__vgpr_vindex__v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -366,7 +366,7 @@ define amdgpu_ps <4 x half> @struct_buffer_load_v4f16__sgpr_rsrc__vgpr_vindex__v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORDX2_BOTHEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_BOTHEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY7]]
|
||||
|
@ -390,7 +390,7 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[BUFFER_LOAD_DWORD_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1)
|
||||
|
|
|
@ -16,7 +16,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr
|
|||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -31,7 +31,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__vgpr_val__sgpr_rsrc__vgpr
|
|||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -55,7 +55,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg
|
|||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -70,7 +70,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f16__vgpr_val__sgpr_rsrc__vg
|
|||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -103,7 +103,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg
|
|||
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY1]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
|
||||
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_BOTHEN_exact [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -120,7 +120,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f16__vgpr_val__sgpr_rsrc__vg
|
|||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; PACKED: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -162,7 +162,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
|
|||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -206,7 +206,7 @@ define amdgpu_ps void @struct_buffer_store_format_f16__sgpr_val__vgpr_rsrc__sgpr
|
|||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -233,7 +233,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr
|
|||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_gfx80_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
@ -248,7 +248,7 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr
|
|||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__vgpr_val__sgpr_rsrc__vgpr
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -37,7 +37,7 @@ define amdgpu_ps void @struct_buffer_store_format_v2f32__vgpr_val__sgpr_rsrc__vg
|
|||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -60,7 +60,7 @@ define amdgpu_ps void @struct_buffer_store_format_v3f32__vgpr_val__sgpr_rsrc__vg
|
|||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZ_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -84,7 +84,7 @@ define amdgpu_ps void @struct_buffer_store_format_v4f32__vgpr_val__sgpr_rsrc__vg
|
|||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -126,7 +126,7 @@ define amdgpu_ps void @struct_buffer_store_format_f32__sgpr_val__vgpr_rsrc__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY8]], [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -153,7 +153,7 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
|
@ -17,7 +17,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -39,7 +39,7 @@ define amdgpu_ps void @struct_buffer_store_v2f32_sgpr_rsrc__vgpr_val__vgpr_vinde
|
|||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -62,7 +62,7 @@ define amdgpu_ps void @struct_buffer_store_v3f32_sgpr_rsrc__vgpr_val__vgpr_vinde
|
|||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
|
||||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX3_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -86,7 +86,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_sgpr_rsrc__vgpr_val__vgpr_vinde
|
|||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr5
|
||||
; CHECK: [[COPY10:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -132,7 +132,7 @@ define amdgpu_ps void @struct_buffer_store_v4f32_vgpr_rsrc__sgpr_val__sgpr_vinde
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY10]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY12]], %subreg.sub0, [[COPY13]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX4_BOTHEN_exact [[COPY11]], [[REG_SEQUENCE5]], [[REG_SEQUENCE4]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -159,7 +159,7 @@ define amdgpu_ps void @struct_buffer_store_i8_sgpr_rsrc__vgpr_val__vgpr_vindex__
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; CHECK: BUFFER_STORE_BYTE_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom "BufferResource", addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i8
|
||||
call void @llvm.amdgcn.struct.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -180,7 +180,7 @@ define amdgpu_ps void @struct_buffer_store_i16_sgpr_rsrc__vgpr_val__vgpr_vindex_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_SHORT_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i16
|
||||
call void @llvm.amdgcn.struct.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
@ -201,7 +201,7 @@ define amdgpu_ps void @struct_buffer_store_f32_sgpr_rsrc__vgpr_val__vgpr_vindex_
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 1)
|
||||
ret void
|
||||
|
@ -221,7 +221,7 @@ define amdgpu_ps void @struct_buffer_store_v2f16_sgpr_rsrc__vgpr_val__vgpr_vinde
|
|||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_BOTHEN_exact [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
@ -249,7 +249,7 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde
|
|||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_BOTHEN_exact [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
|
@ -16,7 +16,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
@ -31,7 +31,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
|
@ -51,7 +51,7 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; UNPACKED-LABEL: name: struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset
|
||||
|
@ -66,7 +66,7 @@ define amdgpu_ps <2 x half> @struct_tbuffer_load_v2f16__sgpr_rsrc__vgpr_vindex__
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub0
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XY_gfx80_BOTHEN]].sub1
|
||||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
|
||||
|
@ -103,7 +103,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub0
|
||||
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN]].sub1
|
||||
; PACKED: $vgpr0 = COPY [[COPY7]]
|
||||
|
@ -121,7 +121,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__sgpr_rsrc__vgpr_vindex__
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub0
|
||||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub1
|
||||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN]].sub2
|
||||
|
@ -163,7 +163,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_vindex0
|
||||
|
@ -179,7 +179,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
|
@ -220,7 +220,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
|
|||
; PACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_D16_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -266,7 +266,7 @@ define amdgpu_ps <4 x half> @struct_tbuffer_load_v4f16__vgpr_rsrc__sgpr_vindex__
|
|||
; UNPACKED: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -314,7 +314,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; PACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_BOTHEN]]
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
; UNPACKED-LABEL: name: struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_voffset__sgpr_soffset_voffset_add4095
|
||||
|
@ -329,7 +329,7 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_D16_X_gfx80_BOTHEN]]
|
||||
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
|
|
|
@ -15,7 +15,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
|
@ -35,7 +35,7 @@ define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32__sgpr_rsrc__vgpr_vindex_
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XY_BOTHEN:%[0-9]+]]:vreg_64 = TBUFFER_LOAD_FORMAT_XY_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XY_BOTHEN]].sub1
|
||||
; CHECK: $vgpr0 = COPY [[COPY7]]
|
||||
|
@ -58,7 +58,7 @@ define amdgpu_ps <3 x float> @struct_tbuffer_load_v3f32__sgpr_rsrc__vgpr_vindex_
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN:%[0-9]+]]:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 12 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZ_BOTHEN]].sub2
|
||||
|
@ -83,7 +83,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__sgpr_rsrc__vgpr_vindex_
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub0
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub1
|
||||
; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN]].sub2
|
||||
|
@ -111,7 +111,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY4]], %subreg.sub1
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY5]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%val = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 0, i32 %voffset, i32 %soffset, i32 78, i32 0)
|
||||
|
@ -152,7 +152,7 @@ define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32__vgpr_rsrc__sgpr_vindex_
|
|||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE4]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
@ -186,7 +186,7 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: [[TBUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = TBUFFER_LOAD_FORMAT_X_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 4095, 78, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource" + 4095, align 1, addrspace 4)
|
||||
; CHECK: $vgpr0 = COPY [[TBUFFER_LOAD_FORMAT_X_BOTHEN]]
|
||||
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
|
||||
%voffset = add i32 %voffset.base, 4095
|
||||
|
|
|
@ -24,7 +24,7 @@ body: |
|
|||
; CHECK: $sgpr1 = COPY killed $sgpr5
|
||||
; CHECK: $sgpr4_sgpr5 = S_GETPC_B64
|
||||
; CHECK: $sgpr4 = S_MOV_B32 $sgpr8
|
||||
; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load 16, align 4, addrspace 4)
|
||||
; CHECK: $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM $sgpr4_sgpr5, 0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 :: (dereferenceable invariant load 16, align 4, addrspace 4)
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
liveins: $sgpr0
|
||||
|
@ -39,7 +39,7 @@ body: |
|
|||
|
||||
bb.1:
|
||||
renamable $vgpr0 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
BUFFER_STORE_DWORD_OFFEN killed renamable $vgpr0, undef renamable $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
|
|
@ -14,9 +14,9 @@ body: |
|
|||
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -33,9 +33,9 @@ body: |
|
|||
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_96 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -52,9 +52,9 @@ body: |
|
|||
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -71,9 +71,9 @@ body: |
|
|||
|
||||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_160 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -92,9 +92,9 @@ body: |
|
|||
%1:vgpr_32 = COPY $vgpr2
|
||||
%3:sgpr_256 = IMPLICIT_DEF
|
||||
%2:vreg_256 = COPY %3:sgpr_256
|
||||
%4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -112,11 +112,11 @@ body: |
|
|||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_512 = IMPLICIT_DEF
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -134,15 +134,15 @@ body: |
|
|||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_1024 = IMPLICIT_DEF
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -160,9 +160,9 @@ body: |
|
|||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%3:areg_64 = IMPLICIT_DEF
|
||||
%2:vreg_64 = COPY %3:areg_64
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -180,9 +180,9 @@ body: |
|
|||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%3:areg_128 = IMPLICIT_DEF
|
||||
%2:vreg_128 = COPY %3:areg_128
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -200,12 +200,12 @@ body: |
|
|||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%3:areg_512 = IMPLICIT_DEF
|
||||
%2:vreg_512 = COPY %3:areg_512
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -223,14 +223,14 @@ body: |
|
|||
%0:vreg_64 = COPY $vgpr0_vgpr1
|
||||
%3:areg_1024 = IMPLICIT_DEF
|
||||
%2:vreg_1024 = COPY %3:areg_1024
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
|
|
@ -16,9 +16,9 @@ body: |
|
|||
|
||||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -35,9 +35,9 @@ body: |
|
|||
|
||||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -54,9 +54,9 @@ body: |
|
|||
|
||||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -73,9 +73,9 @@ body: |
|
|||
|
||||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %1, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -94,9 +94,9 @@ body: |
|
|||
%1:vgpr_32 = COPY $vgpr2
|
||||
%3:sgpr_256 = IMPLICIT_DEF
|
||||
%2:vreg_256_align2 = COPY %3:sgpr_256
|
||||
%4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, addrspace 4)
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -114,11 +114,11 @@ body: |
|
|||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_512_align2 = IMPLICIT_DEF
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -136,15 +136,15 @@ body: |
|
|||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%1:vgpr_32 = COPY $vgpr2
|
||||
%2:vreg_1024_align2 = IMPLICIT_DEF
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -162,9 +162,9 @@ body: |
|
|||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%3:areg_64 = IMPLICIT_DEF
|
||||
%2:vreg_64_align2 = COPY %3:areg_64
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -182,9 +182,9 @@ body: |
|
|||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%3:areg_128 = IMPLICIT_DEF
|
||||
%2:vreg_128_align2 = COPY %3:areg_128
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -202,12 +202,12 @@ body: |
|
|||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%3:areg_512 = IMPLICIT_DEF
|
||||
%2:vreg_512_align2 = COPY %3:areg_512
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
---
|
||||
|
@ -225,14 +225,14 @@ body: |
|
|||
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
|
||||
%3:areg_1024 = IMPLICIT_DEF
|
||||
%2:vreg_1024_align2 = COPY %3:areg_1024
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
|
||||
%1:vgpr_32 = COPY $agpr0
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
|
||||
...
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
|
||||
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load 16 from %ir.13, addrspace 4)
|
||||
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0 :: (invariant load 16 from %ir.13, addrspace 4)
|
||||
|
||||
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
|
||||
.entry:
|
||||
|
|
|
@ -83,14 +83,14 @@ body: |
|
|||
successors: %bb.1(0x40000000), %bb.4(0x40000000)
|
||||
liveins: $sgpr4_sgpr5
|
||||
|
||||
renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
|
||||
renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0, 0 :: (dereferenceable invariant load 4 from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4)
|
||||
renamable $sgpr6_sgpr7 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load 8 from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
|
||||
renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 8, 0 :: (dereferenceable invariant load 4 from %ir.arg1.kernarg.offset.cast, align 8, addrspace 4)
|
||||
S_WAITCNT 49279
|
||||
renamable $vgpr0 = nofpexcept V_MUL_F32_e64 0, killed $sgpr4, 0, $sgpr4, 0, 0, implicit $mode, implicit $exec
|
||||
DBG_VALUE renamable $sgpr6_sgpr7, $noreg, !11, !DIExpression(DW_OP_plus_uconst, 12, DW_OP_stack_value), debug-location !12
|
||||
$vgpr1 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr6_sgpr7
|
||||
$vgpr2 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit killed $sgpr6_sgpr7, implicit $exec
|
||||
GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, 0, 0, 0, implicit $exec, debug-location !12 :: (store 4 into %ir.tmp2, addrspace 1)
|
||||
GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, renamable $vgpr0, 12, 0, implicit $exec, debug-location !12 :: (store 4 into %ir.tmp2, addrspace 1)
|
||||
renamable $sgpr4 = S_MOV_B32 8388608
|
||||
renamable $sgpr4_sgpr5 = nofpexcept V_CMP_GT_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
|
||||
renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
|
||||
|
@ -102,7 +102,7 @@ body: |
|
|||
renamable $sgpr4_sgpr5 = IMPLICIT_DEF
|
||||
$vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $sgpr4_sgpr5
|
||||
$vgpr1 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit killed $sgpr4_sgpr5, implicit $exec
|
||||
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
|
||||
renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`, addrspace 1)
|
||||
renamable $sgpr4 = S_MOV_B32 2139095040
|
||||
S_WAITCNT 3952
|
||||
renamable $sgpr4_sgpr5 = nofpexcept V_CMP_NEQ_F32_e64 0, killed $sgpr4, 0, killed $vgpr0, 0, implicit $mode, implicit $exec
|
||||
|
|
|
@ -8,9 +8,9 @@ name: trivial_smem_clause_load_smrd4_x1
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -20,11 +20,11 @@ name: trivial_smem_clause_load_smrd4_x2
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -34,13 +34,13 @@ name: trivial_smem_clause_load_smrd4_x3
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
$sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -50,15 +50,15 @@ name: trivial_smem_clause_load_smrd4_x4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
|
||||
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
|
||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
$sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
|
||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -67,11 +67,11 @@ name: trivial_smem_clause_load_smrd4_x2_sameptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
|
||||
; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -81,9 +81,9 @@ name: smrd_load4_overwrite_ptr_lo
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
|
||||
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -93,9 +93,9 @@ name: smrd_load4_overwrite_ptr_hi
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
|
||||
; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -105,9 +105,9 @@ name: smrd_load8_overwrite_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: smrd_load8_overwrite_ptr
|
||||
; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -119,46 +119,46 @@ name: break_smem_clause_at_max_smem_clause_size_smrd_load4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
|
||||
; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
|
||||
; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
|
||||
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
|
||||
$sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
|
||||
$sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
|
||||
$sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0
|
||||
$sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
@ -169,12 +169,12 @@ name: break_smem_clause_simple_load_smrd4_lo_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
|
||||
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -184,11 +184,11 @@ name: break_smem_clause_simple_load_smrd4_hi_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -198,12 +198,12 @@ name: break_smem_clause_simple_load_smrd8_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -213,11 +213,11 @@ name: break_smem_clause_simple_load_smrd16_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -228,16 +228,16 @@ body: |
|
|||
; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN: bb.1:
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
bb.0:
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0
|
||||
|
||||
bb.1:
|
||||
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -248,11 +248,11 @@ name: break_smem_clause_store_load_into_ptr_smrd4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
|
||||
; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
|
||||
; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
|
||||
S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0
|
||||
$sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -264,11 +264,11 @@ name: break_smem_clause_store_load_into_data_smrd4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
|
||||
; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0
|
||||
$sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -278,13 +278,13 @@ name: valu_inst_breaks_smem_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: valu_inst_breaks_smem_clause
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -294,13 +294,13 @@ name: salu_inst_breaks_smem_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: salu_inst_breaks_smem_clause
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$sgpr8 = S_MOV_B32 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -309,13 +309,13 @@ name: ds_inst_breaks_smem_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: ds_inst_breaks_smem_clause
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -325,13 +325,13 @@ name: flat_inst_breaks_smem_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: flat_inst_breaks_smem_clause
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -341,11 +341,11 @@ name: implicit_use_breaks_smem_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: implicit_use_breaks_smem_clause
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
|
||||
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
|
||||
; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
|
||||
$sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, implicit $sgpr12_sgpr13
|
||||
$sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
@ -9,10 +9,10 @@ name: trivial_clause_load_flat4_x1
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_clause_load_flat4_x1
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -22,12 +22,12 @@ name: trivial_clause_load_flat4_x2
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_clause_load_flat4_x2
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -37,14 +37,14 @@ name: trivial_clause_load_flat4_x3
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_clause_load_flat4_x3
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr5_vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr7_vgpr8, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -54,16 +54,16 @@ name: trivial_clause_load_flat4_x4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_clause_load_flat4_x4
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr3 = FLAT_LOAD_DWORD $vgpr10_vgpr11, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -73,12 +73,12 @@ name: trivial_clause_load_flat4_x2_sameptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_clause_load_flat4_x2_sameptr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -88,10 +88,10 @@ name: flat_load4_overwrite_ptr_lo
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: flat_load4_overwrite_ptr_lo
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -101,10 +101,10 @@ name: flat_load4_overwrite_ptr_hi
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: flat_load4_overwrite_ptr_hi
|
||||
; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -114,10 +114,10 @@ name: flat_load8_overwrite_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: flat_load8_overwrite_ptr
|
||||
; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -130,48 +130,48 @@ name: break_clause_at_max_clause_size_flat_load4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_at_max_clause_size_flat_load4
|
||||
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr3 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr4 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr5 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
$vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr6 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr7 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr8 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr9 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
$vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr10 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr11 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr12 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr13 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
$vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr14 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr15 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr16 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr17 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$sgpr0 = S_MOV_B32 $sgpr0, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
@ -182,13 +182,13 @@ name: break_clause_simple_load_flat4_lo_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_simple_load_flat4_lo_ptr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -198,13 +198,13 @@ name: break_clause_simple_load_flat4_hi_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_simple_load_flat4_hi_ptr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr3 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -214,13 +214,13 @@ name: break_clause_simple_load_flat8_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_simple_load_flat8_ptr
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -231,12 +231,12 @@ name: break_clause_simple_load_flat16_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_simple_load_flat16_ptr
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2_vgpr3_vgpr4_vgpr5 = FLAT_LOAD_DWORDX4 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -251,17 +251,17 @@ body: |
|
|||
; GCN-LABEL: name: break_clause_block_boundary_load_flat8_ptr
|
||||
; GCN: bb.0:
|
||||
; GCN-NEXT: successors: %bb.1(0x80000000)
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: bb.1:
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
bb.0:
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
bb.1:
|
||||
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2_vgpr3 = FLAT_LOAD_DWORDX2 $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -272,12 +272,12 @@ name: break_clause_store_load_into_ptr_flat4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_store_load_into_ptr_flat4
|
||||
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -289,12 +289,12 @@ name: break_clause_store_load_into_data_flat4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_store_load_into_data_flat4
|
||||
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -305,14 +305,14 @@ name: valu_inst_breaks_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: valu_inst_breaks_clause
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -323,14 +323,14 @@ name: salu_inst_breaks_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: salu_inst_breaks_clause
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$sgpr8 = S_MOV_B32 0
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -340,14 +340,14 @@ name: ds_inst_breaks_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: ds_inst_breaks_clause
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -357,14 +357,14 @@ name: smrd_inst_breaks_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: smrd_inst_breaks_clause
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$sgpr8 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -374,13 +374,13 @@ name: implicit_use_breaks_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: implicit_use_breaks_clause
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
|
||||
; GCN: $vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
|
||||
$vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0_vgpr1 = FLAT_LOAD_DWORDX2 $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr4_vgpr5
|
||||
$vgpr4_vgpr5 = FLAT_LOAD_DWORDX2 $vgpr6_vgpr7, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -389,12 +389,12 @@ name: trivial_clause_load_mubuf4_x2
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -403,13 +403,13 @@ name: break_clause_simple_load_mubuf_offen_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -420,11 +420,11 @@ name: mubuf_load4_overwrite_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: mubuf_load4_overwrite_ptr
|
||||
; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
@ -437,13 +437,13 @@ name: break_clause_flat_load_mubuf_load
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_flat_load_mubuf_load
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
# Break a clause from interference between mubuf and flat instructions
|
||||
|
@ -458,8 +458,8 @@ name: break_clause_mubuf_load_flat_load
|
|||
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
@ -470,13 +470,13 @@ name: break_clause_atomic_rtn_into_ptr_flat4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_flat4
|
||||
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: $vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr4 = FLAT_ATOMIC_ADD_RTN $vgpr5_vgpr6, $vgpr7, 0, 1, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -485,12 +485,12 @@ name: break_clause_atomic_nortn_ptr_load_flat4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_flat4
|
||||
; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_ATOMIC_ADD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = FLAT_LOAD_DWORD $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -500,13 +500,13 @@ name: break_clause_atomic_rtn_into_ptr_mubuf4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 1, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -517,11 +517,11 @@ body: |
|
|||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
|
||||
; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -532,11 +532,11 @@ name: no_break_clause_mubuf_load_novaddr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -546,16 +546,16 @@ name: mix_load_store_clause
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: mix_load_store_clause
|
||||
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -565,15 +565,15 @@ name: mix_load_store_clause_same_address
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: mix_load_store_clause_same_address
|
||||
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN-NEXT: $vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr10 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr6, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr11 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
@ -9,27 +9,27 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
|
|||
; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0
|
||||
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
|
||||
; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load 16 from %ir.arg0, addrspace 6)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 16, align 1, addrspace 4)
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 32, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 32, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 48, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 48, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 64, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 64, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 80, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
|
@ -38,19 +38,19 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
|
|||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1
|
||||
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 96, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 96, align 1, addrspace 4)
|
||||
; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]]
|
||||
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0
|
||||
; GCN: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]]
|
||||
; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]].sub0
|
||||
; GCN: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[DEF2]]
|
||||
; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0
|
||||
; GCN: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[DEF3]]
|
||||
; GCN: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
|
@ -60,23 +60,23 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
|
|||
; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 128, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 144, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 160, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80
|
||||
|
@ -88,77 +88,77 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
|
|||
; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
|
||||
; GCN: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[DEF4]]
|
||||
; GCN: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY13]].sub0
|
||||
; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 88
|
||||
; GCN: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_7]], 88, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
|
||||
; GCN: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF5]]
|
||||
; GCN: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY15]].sub0
|
||||
; GCN: [[S_MOV_B32_8:%[0-9]+]]:sreg_32 = S_MOV_B32 176
|
||||
; GCN: [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 176, align 1, addrspace 4)
|
||||
; GCN: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF6]]
|
||||
; GCN: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0
|
||||
; GCN: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF7]]
|
||||
; GCN: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0
|
||||
; GCN: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF8]]
|
||||
; GCN: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_9:%[0-9]+]]:sreg_32 = S_MOV_B32 96
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 192, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 208, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_13:%[0-9]+]]:sreg_32 = S_MOV_B32 112
|
||||
; GCN: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY27]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_13]], 112, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_14:%[0-9]+]]:sreg_32 = S_MOV_B32 224
|
||||
; GCN: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 224, align 1, addrspace 4)
|
||||
; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_15:%[0-9]+]]:sreg_32 = S_MOV_B32 120
|
||||
; GCN: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY32]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_15]], 120, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240
|
||||
; GCN: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource" + 240, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 256, align 1, addrspace 4)
|
||||
|
@ -176,68 +176,68 @@ define amdgpu_cs void @mmo_offsets0(<4 x i32> addrspace(6)* inreg noalias derefe
|
|||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY41]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
|
||||
; GCN: [[COPY42:%[0-9]+]]:vreg_64 = COPY [[DEF9]]
|
||||
; GCN: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[COPY42]].sub0
|
||||
; GCN: [[S_MOV_B32_18:%[0-9]+]]:sreg_32 = S_MOV_B32 136
|
||||
; GCN: [[COPY44:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[DEF10:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY44]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_18]], 136, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
|
||||
; GCN: [[COPY45:%[0-9]+]]:vreg_64 = COPY [[DEF10]]
|
||||
; GCN: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[COPY45]].sub0
|
||||
; GCN: [[S_MOV_B32_19:%[0-9]+]]:sreg_32 = S_MOV_B32 272
|
||||
; GCN: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[DEF11:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY47]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource" + 272, align 1, addrspace 4)
|
||||
; GCN: [[COPY48:%[0-9]+]]:vreg_64 = COPY [[DEF11]]
|
||||
; GCN: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY48]].sub0
|
||||
; GCN: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY50:%[0-9]+]]:vreg_64 = COPY [[DEF12]]
|
||||
; GCN: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY50]].sub0
|
||||
; GCN: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY54:%[0-9]+]]:vreg_64 = COPY [[DEF13]]
|
||||
; GCN: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[COPY54]].sub0
|
||||
; GCN: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY56:%[0-9]+]]:vreg_64 = COPY [[DEF14]]
|
||||
; GCN: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[COPY56]].sub0
|
||||
; GCN: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, -1, 0, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store 4 on custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY58:%[0-9]+]]:vreg_64 = COPY [[DEF15]]
|
||||
; GCN: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[COPY58]].sub0
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[COPY60:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN2]], [[COPY60]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
|
||||
; GCN: [[COPY61:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN3]], [[COPY61]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 144, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288
|
||||
; GCN: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 288, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: INLINEASM &"", 1 /* sideeffect attdialect */
|
||||
; GCN: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_21:%[0-9]+]]:sreg_32 = S_MOV_B32 152
|
||||
; GCN: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN3]], [[COPY66]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_21]], 152, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
|
||||
; GCN: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304
|
||||
; GCN: [[COPY67:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource" + 304, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; GCN: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[COPY]]
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "BufferResource", align 1, addrspace 4)
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
%tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0
|
||||
|
|
|
@ -10,14 +10,14 @@ body: |
|
|||
bb.0:
|
||||
; GCN-LABEL: name: src_bundle_latency
|
||||
; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
|
||||
; GCN: }
|
||||
; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
||||
; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, implicit $exec
|
||||
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, implicit $exec
|
||||
}
|
||||
$vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
||||
|
@ -32,13 +32,13 @@ body: |
|
|||
; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
|
||||
; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
|
||||
; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
|
||||
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, implicit $exec
|
||||
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, implicit $exec
|
||||
; GCN: }
|
||||
$vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
|
||||
$vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
|
||||
BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
|
||||
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, implicit $exec
|
||||
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, implicit $exec
|
||||
}
|
||||
...
|
||||
|
|
|
@ -25,7 +25,7 @@ body: |
|
|||
; GCN-LABEL: name: call_waw_waitcnt
|
||||
; GCN: liveins: $sgpr4_sgpr5, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GCN: S_WAITCNT 0
|
||||
; GCN: $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0, 0
|
||||
; GCN: $sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0
|
||||
; GCN: $sgpr33 = S_MOV_B32 killed $sgpr7
|
||||
; GCN: $flat_scr_lo = S_ADD_U32 killed $sgpr4, $sgpr33, implicit-def $scc
|
||||
; GCN: $flat_scr_hi = S_ADDC_U32 killed $sgpr5, 0, implicit-def $scc, implicit killed $scc
|
||||
|
@ -38,7 +38,7 @@ body: |
|
|||
; GCN: S_WAITCNT 49279
|
||||
; GCN: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def dead $vgpr0
|
||||
; GCN: S_ENDPGM 0
|
||||
$sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0, 0
|
||||
$sgpr30_sgpr31 = S_LOAD_DWORDX2_IMM $sgpr4_sgpr5, 0, 0
|
||||
$sgpr33 = S_MOV_B32 killed $sgpr7
|
||||
$flat_scr_lo = S_ADD_U32 killed $sgpr4, $sgpr33, implicit-def $scc
|
||||
$flat_scr_hi = S_ADDC_U32 killed $sgpr5, 0, implicit-def $scc, implicit killed $scc
|
||||
|
|
|
@ -43,8 +43,8 @@ body: |
|
|||
|
||||
%3 = COPY $vgpr0
|
||||
%0 = COPY $sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
|
||||
%25 = REG_SEQUENCE %3, 1, %24, 2
|
||||
%10 = S_MOV_B32 61440
|
||||
|
@ -55,10 +55,10 @@ body: |
|
|||
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
||||
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $mode, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -105,8 +105,8 @@ body: |
|
|||
|
||||
%3 = COPY $vgpr0
|
||||
%0 = COPY $sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
|
||||
%25 = REG_SEQUENCE %3, 1, %24, 2
|
||||
%10 = S_MOV_B32 61440
|
||||
|
@ -117,10 +117,10 @@ body: |
|
|||
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
||||
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $mode, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
@ -168,8 +168,8 @@ body: |
|
|||
|
||||
%3 = COPY $vgpr0
|
||||
%0 = COPY $sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
|
||||
%25 = REG_SEQUENCE %3, 1, %24, 2
|
||||
%10 = S_MOV_B32 61440
|
||||
|
@ -180,10 +180,10 @@ body: |
|
|||
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
||||
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $mode, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -233,8 +233,8 @@ body: |
|
|||
|
||||
%3 = COPY $vgpr0
|
||||
%0 = COPY $sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
|
||||
%25 = REG_SEQUENCE %3, 1, %24, 2
|
||||
%10 = S_MOV_B32 61440
|
||||
|
@ -245,10 +245,10 @@ body: |
|
|||
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
||||
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $mode, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -310,8 +310,8 @@ body: |
|
|||
|
||||
%3 = COPY $vgpr0
|
||||
%0 = COPY $sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
|
||||
%25 = REG_SEQUENCE %3, 1, %24, 2
|
||||
%10 = S_MOV_B32 61440
|
||||
|
@ -322,10 +322,10 @@ body: |
|
|||
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
||||
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $mode, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -375,8 +375,8 @@ body: |
|
|||
|
||||
%3 = COPY $vgpr0
|
||||
%0 = COPY $sgpr0_sgpr1
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%5 = S_LOAD_DWORDX2_IMM %0, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
|
||||
%24 = V_ASHRREV_I32_e32 31, %3, implicit $exec
|
||||
%25 = REG_SEQUENCE %3, 1, %24, 2
|
||||
%10 = S_MOV_B32 61440
|
||||
|
@ -387,10 +387,10 @@ body: |
|
|||
%26 = V_LSHL_B64_e64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec
|
||||
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $mode, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
@ -17,15 +17,15 @@ body: |
|
|||
|
||||
$vgpr0_vgpr1 = IMPLICIT_DEF
|
||||
$vgpr4_vgpr5 = IMPLICIT_DEF
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
$vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
$vgpr4 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
$vgpr3 = IMPLICIT_DEF
|
||||
$vgpr6 = IMPLICIT_DEF
|
||||
$vgpr0 = V_ADD_CO_U32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
|
||||
$vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
@ -14,7 +14,7 @@ registers:
|
|||
body: |
|
||||
bb.0:
|
||||
%0 = IMPLICIT_DEF
|
||||
%1 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
%1 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
%2 = V_ADD_F32_e64 0, killed %1, 0, 1, 0, 0, implicit $mode, implicit $exec
|
||||
%3 = FLAT_LOAD_DWORD %0, 4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
%3 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4)
|
||||
...
|
||||
|
|
|
@ -30,7 +30,7 @@ body: |
|
|||
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
|
||||
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
|
||||
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
|
||||
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
|
||||
S_ENDPGM 0
|
||||
|
||||
bb.2:
|
||||
|
@ -78,7 +78,7 @@ body: |
|
|||
|
||||
bb.8:
|
||||
successors: %bb.10
|
||||
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
|
||||
%35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
|
||||
%28:vgpr_32 = COPY %35
|
||||
|
|
|
@ -83,7 +83,7 @@ body: |
|
|||
|
||||
bb.9:
|
||||
successors: %bb.10(0x80000000)
|
||||
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec
|
||||
%22:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc
|
||||
|
@ -125,7 +125,7 @@ body: |
|
|||
%27.sub5:sgpr_256 = COPY %26
|
||||
%27.sub6:sgpr_256 = COPY %26
|
||||
%27.sub7:sgpr_256 = COPY killed %26
|
||||
%28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
||||
%28:vgpr_32 = IMAGE_LOAD_V1_V4 killed %25, killed %27, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
||||
%29:vgpr_32 = nofpexcept V_ADD_F32_e32 0, killed %28, implicit $mode, implicit $exec
|
||||
$m0 = S_MOV_B32 -1
|
||||
DS_WRITE_B32 undef %30:vgpr_32, killed %29, 0, 0, implicit $m0, implicit $exec :: (store 4 into `i32 addrspace(3)* undef`, addrspace 3)
|
||||
|
|
|
@ -68,7 +68,7 @@ body: |
|
|||
%23:vreg_128 = COPY killed %17
|
||||
%24:sreg_64 = COPY killed %16
|
||||
%25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec
|
||||
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec
|
||||
%29:vreg_128 = COPY killed %21
|
||||
%29.sub0:vreg_128 = COPY %1
|
||||
|
@ -257,7 +257,7 @@ body: |
|
|||
%109.sub5:sgpr_256 = COPY %108
|
||||
%109.sub6:sgpr_256 = COPY %108
|
||||
%109.sub7:sgpr_256 = COPY killed %108
|
||||
%110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
||||
%110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
||||
%112:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %110, implicit $mode, implicit $exec
|
||||
%113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %112, implicit $mode, implicit $exec
|
||||
%114:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
|
|
|
@ -46,10 +46,10 @@ body: |
|
|||
%0 = COPY $sgpr2_sgpr3
|
||||
%1 = COPY $vgpr2
|
||||
%2 = COPY $vgpr3
|
||||
%3 = S_LOAD_DWORDX8_IMM %0, 0, 0, 0
|
||||
%4 = S_LOAD_DWORDX4_IMM %0, 12, 0, 0
|
||||
%5 = S_LOAD_DWORDX8_IMM %0, 16, 0, 0
|
||||
%6 = S_LOAD_DWORDX4_IMM %0, 28, 0, 0
|
||||
%3 = S_LOAD_DWORDX8_IMM %0, 0, 0
|
||||
%4 = S_LOAD_DWORDX4_IMM %0, 12, 0
|
||||
%5 = S_LOAD_DWORDX8_IMM %0, 16, 0
|
||||
%6 = S_LOAD_DWORDX4_IMM %0, 28, 0
|
||||
undef %7.sub0 = S_MOV_B32 212739
|
||||
%20 = COPY %7
|
||||
%11 = COPY %20
|
||||
|
@ -61,7 +61,7 @@ body: |
|
|||
%11.sub6 = COPY %1
|
||||
%11.sub7 = COPY %1
|
||||
%11.sub8 = COPY %1
|
||||
dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
|
||||
dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
|
||||
%20.sub1 = COPY %2
|
||||
%20.sub2 = COPY %2
|
||||
%20.sub3 = COPY %2
|
||||
|
@ -70,6 +70,6 @@ body: |
|
|||
%20.sub6 = COPY %2
|
||||
%20.sub7 = COPY %2
|
||||
%20.sub8 = COPY %2
|
||||
dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
|
||||
dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load 4)
|
||||
|
||||
...
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#
|
||||
# GCN-LABEL: bb.6:
|
||||
# GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}})
|
||||
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, implicit $exec
|
||||
#
|
||||
|
||||
--- |
|
||||
|
@ -69,7 +69,7 @@ body: |
|
|||
%10:sreg_64 = COPY killed %5
|
||||
undef %11.sub2:sgpr_128 = COPY %4
|
||||
%11.sub3:sgpr_128 = COPY %3
|
||||
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, implicit $exec
|
||||
undef %13.sub1:vreg_128 = COPY %9.sub1
|
||||
%13.sub2:vreg_128 = COPY %9.sub2
|
||||
%14:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $mode, implicit $exec
|
||||
|
@ -161,7 +161,7 @@ body: |
|
|||
bb.18:
|
||||
successors: %bb.7(0x80000000)
|
||||
dead %59:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $mode, implicit $exec
|
||||
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sgpr_128, undef %65:sreg_32, 0, 0, 0, 0, implicit $exec
|
||||
undef %66.sub1:vreg_128 = COPY %13.sub1
|
||||
%66.sub2:vreg_128 = COPY %13.sub2
|
||||
%67:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $mode, implicit $exec
|
||||
|
|
|
@ -47,7 +47,7 @@ body: |
|
|||
%4.sub5:sgpr_256 = COPY %1
|
||||
%4.sub6:sgpr_256 = COPY %1
|
||||
%4.sub7:sgpr_256 = COPY killed %1
|
||||
%5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
||||
%5:vgpr_32 = IMAGE_LOAD_V1_V4 killed %3, killed %4, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, addrspace 4)
|
||||
%6:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %5, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
|
||||
%7:vgpr_32 = nofpexcept V_RCP_F32_e32 killed %6, implicit $mode, implicit $exec
|
||||
%8:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %7, implicit $mode, implicit $exec
|
||||
|
@ -145,10 +145,10 @@ body: |
|
|||
%40:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %39, 0, -1090519040, 0, 1056964608, 0, 0, implicit $mode, implicit $exec
|
||||
%41:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %40, 0, 0, 0, -1090519040, 0, 0, implicit $mode, implicit $exec
|
||||
%42:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 killed %41, implicit $mode, implicit $exec
|
||||
%43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0, 0 :: (dereferenceable invariant load 4)
|
||||
%43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sgpr_128, 12, 0 :: (dereferenceable invariant load 4)
|
||||
%45:vgpr_32 = V_MUL_LO_I32_e64 killed %42, killed %43, implicit $exec
|
||||
%46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec
|
||||
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec
|
||||
%50:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
|
||||
|
|
|
@ -34,7 +34,7 @@ body: |
|
|||
; GCN: S_BRANCH %bb.1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000)
|
||||
; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
|
||||
; GCN: undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
|
||||
; GCN: undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
|
||||
; GCN: %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
|
||||
|
@ -42,7 +42,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
|
@ -54,7 +54,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
|
||||
|
@ -83,7 +83,7 @@ body: |
|
|||
bb.1:
|
||||
successors: %bb.2, %bb.3
|
||||
|
||||
undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
|
||||
undef %5.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %1, 9, 0 :: (dereferenceable invariant load 8, align 4, addrspace 4)
|
||||
undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
|
||||
%6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
|
||||
%7:vgpr_32 = COPY %5.sub1
|
||||
|
@ -91,7 +91,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
@ -103,7 +103,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
|
@ -27,7 +27,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%2:sreg_32_xm0 = COPY %1.sub1
|
||||
%3:sreg_32_xm0 = COPY %1.sub0
|
||||
%4:sreg_32_xm0 = S_MOV_B32 61440
|
||||
|
@ -37,7 +37,7 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 9999
|
||||
%9:sreg_32_xm0 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10:vgpr_32 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -71,7 +71,7 @@ body: |
|
|||
|
||||
%3:vgpr_32 = COPY $vgpr0
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%13:vgpr_32 = V_ASHRREV_I32_e64 31, %3, implicit $exec
|
||||
%14:vreg_64 = REG_SEQUENCE %3, %subreg.hi16, %13, %subreg.lo16
|
||||
%15:vreg_64 = V_LSHLREV_B64_e64 2, killed %14, implicit $exec
|
||||
|
@ -85,19 +85,19 @@ body: |
|
|||
%16:vgpr_32 = V_MOV_B32_e32 63, implicit $exec
|
||||
|
||||
%9:vgpr_32 = V_AND_B32_e64 %8, %6, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %9, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%10:vgpr_32 = V_AND_B32_e64 %6, %8, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %10, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%11:vgpr_32 = V_AND_B32_e32 %8, %6, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %11, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%12:vgpr_32 = V_AND_B32_e64 %8, %8, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %12, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%13:vgpr_32 = V_AND_B32_e64 %16, %16, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %13, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
S_ENDPGM 0
|
||||
|
||||
|
@ -117,7 +117,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%5:sreg_32_xm0_xexec = S_MOV_B32 1
|
||||
%6:sreg_32_xm0 = COPY %4.sub1
|
||||
%7:sreg_32_xm0 = COPY %4.sub0
|
||||
|
@ -126,7 +126,7 @@ body: |
|
|||
%10:sgpr_128 = REG_SEQUENCE killed %7, %subreg.hi16, killed %6, %subreg.lo16, killed %9, %subreg.sub0, killed %8, %subreg.sub0_sub1
|
||||
%12:sreg_32_xm0 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc
|
||||
%13:vgpr_32 = COPY %12
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -144,7 +144,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%3:sreg_32_xm0_xexec = S_MOV_B32 999123
|
||||
%4:sreg_32_xm0 = COPY %2.sub1
|
||||
%5:sreg_32_xm0 = COPY %2.sub0
|
||||
|
@ -153,7 +153,7 @@ body: |
|
|||
%8:sgpr_128 = REG_SEQUENCE killed %5, %subreg.hi16, killed %4, %subreg.lo16, killed %7, %subreg.sub0, killed %6, %subreg.sub0_sub1
|
||||
%10:sreg_32_xm0 = S_ASHR_I32 killed %3, 12, implicit-def dead $scc
|
||||
%11:vgpr_32 = COPY %10
|
||||
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -200,7 +200,7 @@ body: |
|
|||
|
||||
%2:vgpr_32 = COPY $vgpr0
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
|
||||
%16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
|
||||
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
|
||||
|
@ -221,34 +221,34 @@ body: |
|
|||
%32:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
|
||||
|
||||
%11:vgpr_32 = V_ASHRREV_I32_e64 8, %10, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%12:vgpr_32 = V_ASHRREV_I32_e64 %8, %10, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%13:vgpr_32 = V_ASHR_I32_e64 %7, 3, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%14:vgpr_32 = V_ASHR_I32_e64 7, %29, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%15:vgpr_32 = V_ASHR_I32_e64 %27, %24, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%22:vgpr_32 = V_ASHR_I32_e64 %6, 4, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%23:vgpr_32 = V_ASHR_I32_e64 %6, %30, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%25:vgpr_32 = V_ASHR_I32_e32 %31, %31, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%26:vgpr_32 = V_ASHRREV_I32_e32 11, %10, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%28:vgpr_32 = V_ASHR_I32_e32 %27, %32, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
S_ENDPGM 0
|
||||
|
||||
|
@ -267,7 +267,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%2:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%3:sreg_32_xm0_xexec = S_MOV_B32 -999123
|
||||
%4:sreg_32_xm0 = COPY %2.sub1
|
||||
%5:sreg_32_xm0 = COPY %2.sub0
|
||||
|
@ -276,7 +276,7 @@ body: |
|
|||
%8:sgpr_128 = REG_SEQUENCE killed %5, %subreg.hi16, killed %4, %subreg.lo16, killed %7, %subreg.sub0, killed %6, %subreg.sub0_sub1
|
||||
%10:sreg_32_xm0 = S_LSHR_B32 killed %3, 12, implicit-def dead $scc
|
||||
%11:vgpr_32 = COPY %10
|
||||
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -324,7 +324,7 @@ body: |
|
|||
|
||||
%2:vgpr_32 = COPY $vgpr0
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
|
||||
%16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
|
||||
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
|
||||
|
@ -345,34 +345,34 @@ body: |
|
|||
%32:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
|
||||
|
||||
%11:vgpr_32 = V_LSHRREV_B32_e64 8, %10, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%12:vgpr_32 = V_LSHRREV_B32_e64 %8, %10, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%13:vgpr_32 = V_LSHR_B32_e64 %7, 3, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%14:vgpr_32 = V_LSHR_B32_e64 7, %29, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%15:vgpr_32 = V_LSHR_B32_e64 %27, %24, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%22:vgpr_32 = V_LSHR_B32_e64 %6, 4, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%23:vgpr_32 = V_LSHR_B32_e64 %6, %30, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%25:vgpr_32 = V_LSHR_B32_e32 %31, %31, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%26:vgpr_32 = V_LSHRREV_B32_e32 11, %10, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%28:vgpr_32 = V_LSHR_B32_e32 %27, %32, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -390,7 +390,7 @@ body: |
|
|||
bb.0:
|
||||
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%2:vgpr_32 = V_XOR_B32_e64 killed %0, undef %1:vgpr_32, implicit $exec
|
||||
FLAT_STORE_DWORD undef %3:vreg_64, %2, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD undef %3:vreg_64, %2, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -434,7 +434,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%2:sreg_32_xm0 = COPY %1.sub1
|
||||
%3:sreg_32_xm0 = COPY %1.sub0
|
||||
%4:sreg_32_xm0 = S_MOV_B32 61440
|
||||
|
@ -444,7 +444,7 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 9999
|
||||
%9:sreg_32_xm0 = S_ANDN2_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10:vgpr_32 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -467,7 +467,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%2:sreg_32_xm0 = COPY %1.sub1
|
||||
%3:sreg_32_xm0 = COPY %1.sub0
|
||||
%4:sreg_32_xm0 = S_MOV_B32 61440
|
||||
|
@ -477,7 +477,7 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 9999
|
||||
%9:sreg_32_xm0 = S_OR_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10:vgpr_32 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -516,7 +516,7 @@ body: |
|
|||
|
||||
%3:vgpr_32 = COPY $vgpr0
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%13:vgpr_32 = V_ASHRREV_I32_e64 31, %3, implicit $exec
|
||||
%14:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %13, %subreg.sub1
|
||||
%15:vreg_64 = V_LSHLREV_B64_e64 2, killed %14, implicit $exec
|
||||
|
@ -529,15 +529,15 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 1234567
|
||||
%16:vgpr_32 = V_MOV_B32_e32 63, implicit $exec
|
||||
%9:vgpr_32 = V_OR_B32_e64 %8, %6, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %9, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %9, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%10:vgpr_32 = V_OR_B32_e64 %6, %8, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %10, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %10, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%11:vgpr_32 = V_OR_B32_e32 %8, %6, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %11, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%12:vgpr_32 = V_OR_B32_e64 %8, %8, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %12, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%13:vgpr_32 = V_OR_B32_e64 %16, %16, implicit $exec
|
||||
FLAT_STORE_DWORD %19, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %19, %13, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -560,7 +560,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%2:sreg_32_xm0 = COPY %1.sub1
|
||||
%3:sreg_32_xm0 = COPY %1.sub0
|
||||
%4:sreg_32_xm0 = S_MOV_B32 61440
|
||||
|
@ -570,7 +570,7 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 9999
|
||||
%9:sreg_32_xm0 = S_ORN2_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10:vgpr_32 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -593,7 +593,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%2:sreg_32_xm0 = COPY %1.sub1
|
||||
%3:sreg_32_xm0 = COPY %1.sub0
|
||||
%4:sreg_32_xm0 = S_MOV_B32 61440
|
||||
|
@ -603,7 +603,7 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 9999
|
||||
%9:sreg_32_xm0 = S_NAND_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10:vgpr_32 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -626,7 +626,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%2:sreg_32_xm0 = COPY %1.sub1
|
||||
%3:sreg_32_xm0 = COPY %1.sub0
|
||||
%4:sreg_32_xm0 = S_MOV_B32 61440
|
||||
|
@ -636,7 +636,7 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 9999
|
||||
%9:sreg_32_xm0 = S_NOR_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10:vgpr_32 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -659,7 +659,7 @@ body: |
|
|||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0
|
||||
%1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
|
||||
%2:sreg_32_xm0 = COPY %1.sub1
|
||||
%3:sreg_32_xm0 = COPY %1.sub0
|
||||
%4:sreg_32_xm0 = S_MOV_B32 61440
|
||||
|
@ -669,7 +669,7 @@ body: |
|
|||
%8:sreg_32_xm0 = S_MOV_B32 9999
|
||||
%9:sreg_32_xm0 = S_XNOR_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10:vgpr_32 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
@ -723,7 +723,7 @@ body: |
|
|||
|
||||
%2:vgpr_32 = COPY $vgpr0
|
||||
%0:sgpr_64 = COPY $sgpr0_sgpr1
|
||||
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
|
||||
%3:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`, addrspace 4)
|
||||
%15:vgpr_32 = V_ASHRREV_I32_e64 31, %2, implicit $exec
|
||||
%16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %15, %subreg.sub1
|
||||
%17:vreg_64 = V_LSHLREV_B64_e64 2, killed %16, implicit $exec
|
||||
|
@ -738,25 +738,25 @@ body: |
|
|||
%7:sreg_32_xm0 = S_MOV_B32 1
|
||||
%27:sreg_32_xm0 = S_MOV_B32 -4
|
||||
%11:vgpr_32 = V_LSHLREV_B32_e64 12, %10, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %11, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%12:vgpr_32 = V_LSHLREV_B32_e64 %7, 12, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %12, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%13:vgpr_32 = V_LSHL_B32_e64 %7, 12, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %13, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%14:vgpr_32 = V_LSHL_B32_e64 12, %7, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %14, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%15:vgpr_32 = V_LSHL_B32_e64 12, %24, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %15, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%22:vgpr_32 = V_LSHL_B32_e64 %6, 12, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %22, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%23:vgpr_32 = V_LSHL_B32_e64 %6, 32, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %23, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%25:vgpr_32 = V_LSHL_B32_e32 %6, %6, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %25, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%26:vgpr_32 = V_LSHLREV_B32_e32 11, %24, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %26, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%28:vgpr_32 = V_LSHL_B32_e32 %27, %6, implicit $exec
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
FLAT_STORE_DWORD %20, %28, 0, 0, implicit $exec, implicit $flat_scr
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
@ -37,7 +37,7 @@ body: |
|
|||
; GCN: S_BRANCH %bb.3
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
; GCN: dead %16:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]].sub3, undef %17:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
; GCN: dead %18:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
; GCN: dead %20:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
|
@ -89,7 +89,7 @@ body: |
|
|||
S_BRANCH %bb.3
|
||||
|
||||
bb.3:
|
||||
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
%36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
|
|
|
@ -66,9 +66,9 @@ body: |
|
|||
; CHECK: dead %16:vgpr_32 = COPY %11.sub0
|
||||
; CHECK: undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF4]].sub0, [[DEF6]].sub0, 0, implicit $exec
|
||||
; CHECK: dead undef %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 [[DEF4]].sub1, [[DEF6]].sub1, %18, 0, implicit $exec
|
||||
; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; CHECK: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF1]], 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
; CHECK: dead %20:sreg_64 = V_CMP_GT_I32_e64 4, [[DEF7]], implicit $exec
|
||||
; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; CHECK: GLOBAL_STORE_DWORDX2 [[COPY]], [[DEF8]], 288, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]]
|
||||
|
@ -79,7 +79,7 @@ body: |
|
|||
; CHECK: bb.4:
|
||||
; CHECK: successors: %bb.5(0x80000000)
|
||||
; CHECK: dead %21:sreg_64 = COPY $exec
|
||||
; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
|
||||
; CHECK: dead %22:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY1]], 0, 0, implicit $exec :: (load 16, addrspace 1)
|
||||
; CHECK: DBG_VALUE %22, $noreg, <0x{{[0-9a-f]+}}>, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !DILocation(line: 0, scope: <0x{{[0-9a-f]+}}>)
|
||||
; CHECK: bb.5:
|
||||
; CHECK: successors: %bb.3(0x40000000), %bb.1(0x40000000)
|
||||
|
@ -109,9 +109,9 @@ body: |
|
|||
dead %16:vgpr_32 = COPY %11.sub0
|
||||
undef %17.sub0:vreg_64, %18:sreg_64_xexec = V_ADD_CO_U32_e64 %6.sub0, %8.sub0, 0, implicit $exec
|
||||
dead %17.sub1:vreg_64, dead %19:sreg_64_xexec = V_ADDC_U32_e64 %6.sub1, %8.sub1, %18, 0, implicit $exec
|
||||
%6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, 0, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
%6:vreg_64 = GLOBAL_LOAD_DWORDX2 %3, 0, 0, implicit $exec :: (load 8, addrspace 1)
|
||||
dead %20:sreg_64 = V_CMP_GT_I32_e64 4, %9, implicit $exec
|
||||
GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, 0, 0, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
GLOBAL_STORE_DWORDX2 %0, %10, 288, 0, implicit $exec :: (store 8, addrspace 1)
|
||||
|
||||
bb.2:
|
||||
%5:vgpr_32 = COPY %13
|
||||
|
@ -122,7 +122,7 @@ body: |
|
|||
|
||||
bb.4:
|
||||
dead %21:sreg_64 = COPY $exec
|
||||
%22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, 0, 0, 0, implicit $exec :: (load 16, addrspace 1)
|
||||
%22:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec :: (load 16, addrspace 1)
|
||||
DBG_VALUE %22, $noreg, !16, !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), debug-location !21
|
||||
|
||||
bb.5:
|
||||
|
|
|
@ -11,7 +11,7 @@ body: |
|
|||
bb.0:
|
||||
liveins: $sgpr0_sgpr1
|
||||
|
||||
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0, 0
|
||||
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 9, 0
|
||||
S_NOP 0, implicit-def %4:sgpr_128, implicit %10.sub1:sgpr_128
|
||||
S_CBRANCH_SCC0 %bb.3, implicit undef $scc
|
||||
S_BRANCH %bb.1
|
||||
|
@ -26,7 +26,7 @@ body: |
|
|||
S_BRANCH %bb.4
|
||||
|
||||
bb.3:
|
||||
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0, 0
|
||||
%10:sgpr_128 = S_LOAD_DWORDX4_IMM killed $noreg, 10, 0
|
||||
%7:sreg_32_xm0 = COPY %10.sub1:sgpr_128
|
||||
%8:sreg_32_xm0 = COPY %10.sub2:sgpr_128
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue