[AMDGPU] Add support for GFX11 LDSDIR hazards
Detect LDS direct WAR/WAW hazards and compute values for wait_vdst (va_vdst) parameter. Where appropriate this raises wait_vdst from the default 0 to allow concurrent issue of LDS direct with VALU execution. Also detect LDS direct versus VMEM source VGPR hazards and insert vm_vsrc=0 waits using s_waitcnt_depctr. Differential Revision: https://reviews.llvm.org/D127963
This commit is contained in:
parent
bbf3fd4af1
commit
13107c2770
|
@ -427,6 +427,7 @@ void GCNHazardRecognizer::RecedeCycle() {
|
|||
typedef enum { HazardFound, HazardExpired, NoHazardFound } HazardFnResult;
|
||||
|
||||
typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
|
||||
typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
|
||||
|
||||
// Search for a hazard in a block and its predecessors.
|
||||
template <typename StateT>
|
||||
|
@ -473,11 +474,11 @@ hasHazard(StateT State,
|
|||
// Returns a minimum wait states since \p I walking all predecessors.
|
||||
// Only scans until \p IsExpired does not return true.
|
||||
// Can only be run in a hazard recognizer mode.
|
||||
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
|
||||
const MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::const_reverse_instr_iterator I,
|
||||
int WaitStates, IsExpiredFn IsExpired,
|
||||
DenseSet<const MachineBasicBlock *> &Visited) {
|
||||
static int getWaitStatesSince(
|
||||
GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
|
||||
MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
|
||||
IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
|
||||
GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
|
||||
for (auto E = MBB->instr_rend(); I != E; ++I) {
|
||||
// Don't add WaitStates for parent BUNDLE instructions.
|
||||
if (I->isBundle())
|
||||
|
@ -489,7 +490,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
|
|||
if (I->isInlineAsm())
|
||||
continue;
|
||||
|
||||
WaitStates += SIInstrInfo::getNumWaitStates(*I);
|
||||
WaitStates += GetNumWaitStates(*I);
|
||||
|
||||
if (IsExpired(*I, WaitStates))
|
||||
return std::numeric_limits<int>::max();
|
||||
|
@ -500,8 +501,8 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
|
|||
if (!Visited.insert(Pred).second)
|
||||
continue;
|
||||
|
||||
int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(),
|
||||
WaitStates, IsExpired, Visited);
|
||||
int W = getWaitStatesSince(IsHazard, Pred, Pred->instr_rbegin(), WaitStates,
|
||||
IsExpired, Visited, GetNumWaitStates);
|
||||
|
||||
MinWaitStates = std::min(MinWaitStates, W);
|
||||
}
|
||||
|
@ -1075,6 +1076,10 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
|||
fixSMEMtoVectorWriteHazards(MI);
|
||||
fixVcmpxExecWARHazard(MI);
|
||||
fixLdsBranchVmemWARHazard(MI);
|
||||
if (ST.hasLdsDirect()) {
|
||||
fixLdsDirectVALUHazard(MI);
|
||||
fixLdsDirectVMEMHazard(MI);
|
||||
}
|
||||
fixVALUPartialForwardingHazard(MI);
|
||||
fixVALUTransUseHazard(MI);
|
||||
}
|
||||
|
@ -1366,6 +1371,81 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
|
||||
if (!SIInstrInfo::isLDSDIR(*MI))
|
||||
return false;
|
||||
|
||||
const int NoHazardWaitStates = 15;
|
||||
const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
|
||||
const Register VDSTReg = VDST->getReg();
|
||||
|
||||
bool VisitedTrans = false;
|
||||
auto IsHazardFn = [this, VDSTReg, &VisitedTrans](const MachineInstr &I) {
|
||||
if (!SIInstrInfo::isVALU(I))
|
||||
return false;
|
||||
VisitedTrans = VisitedTrans || SIInstrInfo::isTRANS(I);
|
||||
// Cover both WAR and WAW
|
||||
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
|
||||
};
|
||||
auto IsExpiredFn = [&](const MachineInstr &I, int WaitStates) {
|
||||
if (WaitStates >= NoHazardWaitStates)
|
||||
return true;
|
||||
// Instructions which cause va_vdst==0 expire hazard
|
||||
return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
|
||||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
|
||||
};
|
||||
auto GetWaitStatesFn = [](const MachineInstr &MI) {
|
||||
return SIInstrInfo::isVALU(MI) ? 1 : 0;
|
||||
};
|
||||
|
||||
DenseSet<const MachineBasicBlock *> Visited;
|
||||
auto Count = ::getWaitStatesSince(IsHazardFn, MI->getParent(),
|
||||
std::next(MI->getReverseIterator()), 0,
|
||||
IsExpiredFn, Visited, GetWaitStatesFn);
|
||||
|
||||
// Transcendentals can execute in parallel to other VALUs.
|
||||
// This makes va_vdst count unusable with a mixture of VALU and TRANS.
|
||||
if (VisitedTrans)
|
||||
Count = 0;
|
||||
|
||||
MachineOperand *WaitVdstOp =
|
||||
TII.getNamedOperand(*MI, AMDGPU::OpName::waitvdst);
|
||||
WaitVdstOp->setImm(std::min(Count, NoHazardWaitStates));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
|
||||
if (!SIInstrInfo::isLDSDIR(*MI))
|
||||
return false;
|
||||
|
||||
const MachineOperand *VDST = TII.getNamedOperand(*MI, AMDGPU::OpName::vdst);
|
||||
const Register VDSTReg = VDST->getReg();
|
||||
|
||||
auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
|
||||
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
|
||||
!SIInstrInfo::isDS(I))
|
||||
return false;
|
||||
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
|
||||
};
|
||||
auto IsExpiredFn = [](const MachineInstr &I, int) {
|
||||
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
|
||||
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
|
||||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
|
||||
I.getOperand(0).getImm() == 0xffe3);
|
||||
};
|
||||
|
||||
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
|
||||
std::numeric_limits<int>::max())
|
||||
return false;
|
||||
|
||||
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
|
||||
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
|
||||
.addImm(0xffe3);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
|
||||
if (!ST.isWave64())
|
||||
return false;
|
||||
|
|
|
@ -96,6 +96,8 @@ private:
|
|||
bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
|
||||
bool fixVcmpxExecWARHazard(MachineInstr *MI);
|
||||
bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
|
||||
bool fixLdsDirectVALUHazard(MachineInstr *MI);
|
||||
bool fixLdsDirectVMEMHazard(MachineInstr *MI);
|
||||
bool fixVALUPartialForwardingHazard(MachineInstr *MI);
|
||||
bool fixVALUTransUseHazard(MachineInstr *MI);
|
||||
|
||||
|
|
|
@ -0,0 +1,409 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: lds_param_load_no_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_no_war
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_va_vdst0_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_va_vdst0_war
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_va_vdst0_war_salu
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_va_vdst0_war_salu
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $m0 = S_MOV_B32 killed $sgpr0
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$m0 = S_MOV_B32 killed $sgpr0
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_va_vdst1_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_va_vdst1_war
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 1, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_va_vdst10_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_va_vdst10_war
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 10, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_va_vdst10_waw
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_va_vdst10_waw
|
||||
; GCN: $vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 10, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr1 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_va_vdst20_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_va_vdst20_war
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr7 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr8 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr9 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr10 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr11 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr12 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr13 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr14 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr15 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr16 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr17 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr18 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr19 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr20 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr21 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_valu_war_trans
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_valu_war_trans
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: S_WAITCNT_DEPCTR 4095
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_SQRT_F32_e32 $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_trans_war_valu
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_trans_war_valu
|
||||
; GCN: $vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_SQRT_F32_e32 $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr4 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_valu_war_vmem
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_valu_war_vmem
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr4 = IMAGE_LOAD_V1_V4 $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_valu_war_lds
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_valu_war_lds
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr10 = DS_READ_B32 $vgpr2, 0, 0, implicit $m0, implicit $exec
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_valu_war_ldsdir
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_valu_war_ldsdir
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr10 = LDS_PARAM_LOAD 0, 1, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 4, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr2 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr3 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr10 = LDS_PARAM_LOAD 0, 1, 15, implicit $m0, implicit $exec
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr6 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 4, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_vmem_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_vmem_war
|
||||
; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
|
||||
; GCN-NEXT: S_WAITCNT_DEPCTR 65507
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_vmem_war_valu
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_vmem_war_valu
|
||||
; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
|
||||
; GCN-NEXT: $vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
$vgpr5 = V_MUL_F32_e32 $vgpr2, $vgpr2, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_vmem_war_exp
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_vmem_war_exp
|
||||
; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
|
||||
; GCN-NEXT: EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_vmem_war_waitcnt
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt
|
||||
; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
|
||||
; GCN-NEXT: S_WAITCNT 0
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
S_WAITCNT 0
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_vmem_war_waitcnt_depctr
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr
|
||||
; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
|
||||
; GCN-NEXT: S_WAITCNT_DEPCTR 65507
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
S_WAITCNT_DEPCTR 65507
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_param_load_vmem_war_waitcnt_depctr2
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_param_load_vmem_war_waitcnt_depctr2
|
||||
; GCN: $vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s32))
|
||||
; GCN-NEXT: S_WAITCNT_DEPCTR 65535
|
||||
; GCN-NEXT: S_WAITCNT_DEPCTR 65507
|
||||
; GCN-NEXT: $vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = IMAGE_LOAD_V1_V4 $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 2, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4)
|
||||
S_WAITCNT_DEPCTR 65535
|
||||
$vgpr1 = LDS_PARAM_LOAD 0, 0, 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_direct_load_no_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_direct_load_no_war
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_DIRECT_LOAD 15, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_DIRECT_LOAD 0, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: lds_direct_load_va_vdst0_war
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: lds_direct_load_va_vdst0_war
|
||||
; GCN: $vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = LDS_DIRECT_LOAD 0, implicit $m0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = V_MUL_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr1 = LDS_DIRECT_LOAD 15, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
Loading…
Reference in New Issue