[AMDGPU] Increate hazard for store dwordx3/4 to 2 waitstates on gfx940

Fixes: SWDEV-327053

Differential Revision: https://reviews.llvm.org/D123687
This commit is contained in:
Stanislav Mekhanoshin 2022-04-13 09:27:09 -07:00
parent 0ef46dc0f9
commit d951d937a0
3 changed files with 28 additions and 2 deletions

View File

@ -794,7 +794,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
// 8 bytes can have there store data over written by the next instruction.
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const int VALUWaitStates = 1;
const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1;
int WaitStatesNeeded = 0;
if (!TRI->isVectorRegister(MRI, Def.getReg()))

View File

@ -215,3 +215,29 @@ body: |
$vgpr1 = V_ADD_CO_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $exec
$sgpr1 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
...
# GCN-LABEL: name: global_store_dwordx4_data_hazard
# GCN: GLOBAL_STORE_DWORDX4
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MOV_B32_e32
name: global_store_dwordx4_data_hazard
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
GLOBAL_STORE_DWORDX4 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit $exec
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
...
# GCN-LABEL: name: global_store_dwordx3_data_hazard
# GCN: GLOBAL_STORE_DWORDX3
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_MOV_B32_e32
name: global_store_dwordx3_data_hazard
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
GLOBAL_STORE_DWORDX3 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4, 0, 0, implicit $exec
$vgpr2 = V_MOV_B32_e32 0, implicit $exec
...

View File

@ -1155,7 +1155,7 @@ body: |
...
# GCN-LABEL: name: flat_store_data_agpr_overwritten
# GCN: FLAT_STORE_DWORDX4
# GCN-NEXT: S_NOP 0
# GCN-NEXT: S_NOP 1
# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64
name: flat_store_data_agpr_overwritten
body: |