forked from OSchip/llvm-project
[AMDGPU] Add GFX11 test coverage for the memory legalizer
This commit is contained in:
parent
a33983729d
commit
a3fc8adb7e
|
@ -8,6 +8,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @singlethread_acquire_fence() {
|
||||
; GFX6-LABEL: singlethread_acquire_fence:
|
||||
|
@ -45,6 +47,14 @@ define amdgpu_kernel void @singlethread_acquire_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_acquire_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") acquire
|
||||
ret void
|
||||
|
@ -86,6 +96,14 @@ define amdgpu_kernel void @singlethread_release_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_release_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") release
|
||||
ret void
|
||||
|
@ -127,6 +145,14 @@ define amdgpu_kernel void @singlethread_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_acq_rel_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") acq_rel
|
||||
ret void
|
||||
|
@ -168,6 +194,14 @@ define amdgpu_kernel void @singlethread_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_seq_cst_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread") seq_cst
|
||||
ret void
|
||||
|
@ -209,6 +243,14 @@ define amdgpu_kernel void @singlethread_one_as_acquire_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_one_as_acquire_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_one_as_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_one_as_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acquire
|
||||
ret void
|
||||
|
@ -250,6 +292,14 @@ define amdgpu_kernel void @singlethread_one_as_release_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_one_as_release_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_one_as_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_one_as_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") release
|
||||
ret void
|
||||
|
@ -291,6 +341,14 @@ define amdgpu_kernel void @singlethread_one_as_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_one_as_acq_rel_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_one_as_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_one_as_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") acq_rel
|
||||
ret void
|
||||
|
@ -332,6 +390,14 @@ define amdgpu_kernel void @singlethread_one_as_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: singlethread_one_as_seq_cst_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: singlethread_one_as_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: singlethread_one_as_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("singlethread-one-as") seq_cst
|
||||
ret void
|
||||
|
@ -373,6 +439,14 @@ define amdgpu_kernel void @wavefront_acquire_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_acquire_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") acquire
|
||||
ret void
|
||||
|
@ -414,6 +488,14 @@ define amdgpu_kernel void @wavefront_release_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_release_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") release
|
||||
ret void
|
||||
|
@ -455,6 +537,14 @@ define amdgpu_kernel void @wavefront_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_acq_rel_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") acq_rel
|
||||
ret void
|
||||
|
@ -496,6 +586,14 @@ define amdgpu_kernel void @wavefront_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_seq_cst_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront") seq_cst
|
||||
ret void
|
||||
|
@ -537,6 +635,14 @@ define amdgpu_kernel void @wavefront_one_as_acquire_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_one_as_acquire_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_one_as_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_one_as_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acquire
|
||||
ret void
|
||||
|
@ -578,6 +684,14 @@ define amdgpu_kernel void @wavefront_one_as_release_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_one_as_release_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_one_as_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_one_as_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") release
|
||||
ret void
|
||||
|
@ -619,6 +733,14 @@ define amdgpu_kernel void @wavefront_one_as_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_one_as_acq_rel_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_one_as_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_one_as_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") acq_rel
|
||||
ret void
|
||||
|
@ -660,6 +782,14 @@ define amdgpu_kernel void @wavefront_one_as_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-LABEL: wavefront_one_as_seq_cst_fence:
|
||||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: wavefront_one_as_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: wavefront_one_as_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("wavefront-one-as") seq_cst
|
||||
ret void
|
||||
|
@ -714,6 +844,18 @@ define amdgpu_kernel void @workgroup_acquire_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acquire
|
||||
ret void
|
||||
|
@ -765,6 +907,17 @@ define amdgpu_kernel void @workgroup_release_fence() {
|
|||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") release
|
||||
ret void
|
||||
|
@ -819,6 +972,18 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") acq_rel
|
||||
ret void
|
||||
|
@ -873,6 +1038,18 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup") seq_cst
|
||||
ret void
|
||||
|
@ -921,6 +1098,17 @@ define amdgpu_kernel void @workgroup_one_as_acquire_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_one_as_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acquire
|
||||
ret void
|
||||
|
@ -966,6 +1154,16 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
|
|||
; GFX940-TGSPLIT: ; %bb.0: ; %entry
|
||||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_one_as_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") release
|
||||
ret void
|
||||
|
@ -1014,6 +1212,17 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_one_as_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") acq_rel
|
||||
ret void
|
||||
|
@ -1062,6 +1271,17 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: workgroup_one_as_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("workgroup-one-as") seq_cst
|
||||
ret void
|
||||
|
@ -1126,6 +1346,22 @@ define amdgpu_kernel void @agent_acquire_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acquire
|
||||
ret void
|
||||
|
@ -1180,6 +1416,18 @@ define amdgpu_kernel void @agent_release_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") release
|
||||
ret void
|
||||
|
@ -1244,6 +1492,22 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") acq_rel
|
||||
ret void
|
||||
|
@ -1308,6 +1572,22 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent") seq_cst
|
||||
ret void
|
||||
|
@ -1372,6 +1652,22 @@ define amdgpu_kernel void @agent_one_as_acquire_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_one_as_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_one_as_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acquire
|
||||
ret void
|
||||
|
@ -1426,6 +1722,18 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_one_as_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_one_as_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") release
|
||||
ret void
|
||||
|
@ -1490,6 +1798,22 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_one_as_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_one_as_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") acq_rel
|
||||
ret void
|
||||
|
@ -1554,6 +1878,22 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: agent_one_as_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: agent_one_as_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("agent-one-as") seq_cst
|
||||
ret void
|
||||
|
@ -1622,6 +1962,22 @@ define amdgpu_kernel void @system_acquire_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acquire
|
||||
ret void
|
||||
|
@ -1678,6 +2034,18 @@ define amdgpu_kernel void @system_release_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence release
|
||||
ret void
|
||||
|
@ -1746,6 +2114,22 @@ define amdgpu_kernel void @system_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence acq_rel
|
||||
ret void
|
||||
|
@ -1814,6 +2198,22 @@ define amdgpu_kernel void @system_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence seq_cst
|
||||
ret void
|
||||
|
@ -1882,6 +2282,22 @@ define amdgpu_kernel void @system_one_as_acquire_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_one_as_acquire_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_one_as_acquire_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acquire
|
||||
ret void
|
||||
|
@ -1938,6 +2354,18 @@ define amdgpu_kernel void @system_one_as_release_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_one_as_release_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_one_as_release_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") release
|
||||
ret void
|
||||
|
@ -2006,6 +2434,22 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_one_as_acq_rel_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_one_as_acq_rel_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") acq_rel
|
||||
ret void
|
||||
|
@ -2074,6 +2518,22 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: buffer_inv sc0 sc1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: system_one_as_seq_cst_fence:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: buffer_gl1_inv
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: system_one_as_seq_cst_fence:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: buffer_gl0_inv
|
||||
; GFX11-CU-NEXT: buffer_gl1_inv
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
entry:
|
||||
fence syncscope("one-as") seq_cst
|
||||
ret void
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -7,6 +7,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @flat_nontemporal_load_0(
|
||||
; GFX7-LABEL: flat_nontemporal_load_0:
|
||||
|
@ -112,6 +114,32 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] slc
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] slc
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load i32, i32* %in, align 4, !nontemporal !0
|
||||
|
@ -235,6 +263,34 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0
|
||||
; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] slc
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s0, v0
|
||||
; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] slc
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -348,6 +404,32 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1]
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 glc slc
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1]
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load i32, i32* %in, align 4
|
||||
|
@ -471,6 +553,34 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[1:2]
|
||||
; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2 glc slc
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s2, v0
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[1:2]
|
||||
; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2 glc slc
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -3,6 +3,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @flat_nontemporal_load_0(
|
||||
; GFX7-LABEL: flat_nontemporal_load_0:
|
||||
|
@ -60,6 +62,34 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc dlc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc dlc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load volatile i32, i32* %in, align 4
|
||||
|
@ -129,6 +159,36 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s0, v0
|
||||
; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc dlc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s0, v0
|
||||
; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1] glc dlc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -194,6 +254,34 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
|
|||
; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1]
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1]
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load i32, i32* %in, align 4
|
||||
|
@ -263,6 +351,36 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
|
|||
; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_nontemporal_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX11-WGP-NEXT: v_add_co_u32 v0, s0, s2, v0
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[1:2]
|
||||
; GFX11-WGP-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_nontemporal_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s1
|
||||
; GFX11-CU-NEXT: v_add_co_u32 v0, s0, s2, v0
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[1:2]
|
||||
; GFX11-CU-NEXT: v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -328,6 +446,34 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_volatile_workgroup_acquire_load:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: flat_load_b32 v2, v[0:1] glc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_volatile_workgroup_acquire_load:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: flat_load_b32 v2, v[0:1]
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32* %in, i32* %out) {
|
||||
entry:
|
||||
%val = load atomic volatile i32, i32* %in syncscope("workgroup") acquire, align 4
|
||||
|
@ -386,6 +532,33 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: flat_store_dword v[0:1], v2
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: flat_volatile_workgroup_release_store:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: flat_volatile_workgroup_release_store:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: flat_store_b32 v[0:1], v2
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 %in, i32* %out) {
|
||||
entry:
|
||||
store atomic volatile i32 %in, i32* %out syncscope("workgroup") release, align 4
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -8,6 +8,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @global_nontemporal_load_0(
|
||||
; GFX6-LABEL: global_nontemporal_load_0:
|
||||
|
@ -115,6 +117,28 @@ define amdgpu_kernel void @global_nontemporal_load_0(
|
|||
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3]
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_nontemporal_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_nontemporal_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0
|
||||
|
@ -237,6 +261,28 @@ define amdgpu_kernel void @global_nontemporal_load_1(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3]
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_nontemporal_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_load_b32 v0, v0, s[0:1] slc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[2:3]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_nontemporal_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_load_b32 v0, v0, s[0:1] slc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[2:3]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -352,6 +398,28 @@ define amdgpu_kernel void @global_nontemporal_store_0(
|
|||
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] nt
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_nontemporal_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] glc slc
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_nontemporal_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] glc slc
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -469,6 +537,28 @@ define amdgpu_kernel void @global_nontemporal_store_1(
|
|||
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] nt
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_nontemporal_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3] glc slc
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_nontemporal_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3] glc slc
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @global_volatile_load_0(
|
||||
; GFX6-LABEL: global_volatile_load_0:
|
||||
|
@ -68,6 +70,26 @@ define amdgpu_kernel void @global_volatile_load_0(
|
|||
; SKIP-CACHE-INV-NEXT: s_mov_b32 s5, s3
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_volatile_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc dlc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_volatile_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1] glc dlc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load volatile i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -146,6 +168,28 @@ define amdgpu_kernel void @global_volatile_load_1(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_volatile_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[2:3]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_volatile_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[2:3]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -222,6 +266,30 @@ define amdgpu_kernel void @global_volatile_store_0(
|
|||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_volatile_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_volatile_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -300,6 +368,30 @@ define amdgpu_kernel void @global_volatile_store_1(
|
|||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_volatile_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_volatile_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -373,6 +465,27 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[4:7], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_volatile_workgroup_acquire_load:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_load_b32 v1, v0, s[0:1] glc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_volatile_workgroup_acquire_load:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_load_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[2:3]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load atomic volatile i32, i32 addrspace(1)* %in syncscope("workgroup") acquire, align 4
|
||||
|
@ -441,6 +554,31 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: global_volatile_workgroup_release_store:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: global_volatile_workgroup_release_store:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
store atomic volatile i32 %in, i32 addrspace(1)* %out syncscope("workgroup") release, align 4
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -8,6 +8,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @local_nontemporal_load_0(
|
||||
; GFX6-LABEL: local_nontemporal_load_0:
|
||||
|
@ -125,6 +127,32 @@ define amdgpu_kernel void @local_nontemporal_load_0(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3]
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_nontemporal_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_nontemporal_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(3)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0
|
||||
|
@ -251,6 +279,32 @@ define amdgpu_kernel void @local_nontemporal_load_1(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3]
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_nontemporal_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s2
|
||||
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_nontemporal_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s2
|
||||
; GFX11-CU-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(3)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -373,6 +427,32 @@ define amdgpu_kernel void @local_nontemporal_store_0(
|
|||
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_nontemporal_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_nontemporal_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -496,6 +576,32 @@ define amdgpu_kernel void @local_nontemporal_store_1(
|
|||
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX940-TGSPLIT-NEXT: ds_write_b32 v0, v1
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_nontemporal_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_nontemporal_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -4,6 +4,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @local_volatile_load_0(
|
||||
; GFX6-LABEL: local_volatile_load_0:
|
||||
|
@ -73,6 +75,32 @@ define amdgpu_kernel void @local_volatile_load_0(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_volatile_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_volatile_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX11-CU-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(3)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load volatile i32, i32 addrspace(3)* %in, align 4
|
||||
|
@ -151,6 +179,32 @@ define amdgpu_kernel void @local_volatile_load_1(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_volatile_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s2
|
||||
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_volatile_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s2
|
||||
; GFX11-CU-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(3)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -225,6 +279,32 @@ define amdgpu_kernel void @local_volatile_store_0(
|
|||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_volatile_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_volatile_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -300,6 +380,32 @@ define amdgpu_kernel void @local_volatile_store_1(
|
|||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_volatile_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_volatile_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -369,6 +475,29 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load(
|
|||
; SKIP-CACHE-INV-NEXT: v_mov_b32_e32 v1, s1
|
||||
; SKIP-CACHE-INV-NEXT: ds_write_b32 v1, v0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_volatile_workgroup_acquire_load:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: buffer_gl0_inv
|
||||
; GFX11-WGP-NEXT: ds_store_b32 v1, v0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_volatile_workgroup_acquire_load:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: ds_load_b32 v0, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: ds_store_b32 v1, v0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(3)* %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
%val = load atomic volatile i32, i32 addrspace(3)* %in syncscope("workgroup") acquire, align 4
|
||||
|
@ -431,6 +560,27 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
|
|||
; SKIP-CACHE-INV-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: ds_write_b32 v0, v1
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: local_volatile_workgroup_release_store:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: local_volatile_workgroup_release_store:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: ds_store_b32 v0, v1
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 %in, i32 addrspace(3)* %out) {
|
||||
entry:
|
||||
store atomic volatile i32 %in, i32 addrspace(3)* %out syncscope("workgroup") release, align 4
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -8,6 +8,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+tgsplit -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @private_nontemporal_load_0(
|
||||
; GFX6-LABEL: private_nontemporal_load_0:
|
||||
|
@ -149,6 +151,30 @@ define amdgpu_kernel void @private_nontemporal_load_0(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3]
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_nontemporal_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: scratch_load_b32 v0, off, s2 slc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_nontemporal_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: scratch_load_b32 v0, off, s2 slc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(5)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(5)* %in, align 4, !nontemporal !0
|
||||
|
@ -301,6 +327,30 @@ define amdgpu_kernel void @private_nontemporal_load_1(
|
|||
; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3]
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_nontemporal_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: scratch_load_b32 v0, v0, s2 slc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_nontemporal_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: scratch_load_b32 v0, v0, s2 slc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(5)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -448,6 +498,30 @@ define amdgpu_kernel void @private_nontemporal_store_0(
|
|||
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0
|
||||
; GFX940-TGSPLIT-NEXT: scratch_store_dword off, v0, s4 nt
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_nontemporal_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX11-WGP-NEXT: scratch_store_b32 off, v0, s0 glc slc
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_nontemporal_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX11-CU-NEXT: scratch_store_b32 off, v0, s0 glc slc
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(5)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -598,6 +672,32 @@ define amdgpu_kernel void @private_nontemporal_store_1(
|
|||
; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0
|
||||
; GFX940-TGSPLIT-NEXT: scratch_store_dword v0, v1, s4 nt
|
||||
; GFX940-TGSPLIT-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_nontemporal_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: scratch_store_b32 v0, v1, s0 glc slc
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_nontemporal_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: scratch_store_b32 v0, v1, s0 glc slc
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(5)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-WGP %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-CU %s
|
||||
|
||||
define amdgpu_kernel void @private_volatile_load_0(
|
||||
; GFX6-LABEL: private_volatile_load_0:
|
||||
|
@ -93,6 +95,30 @@ define amdgpu_kernel void @private_volatile_load_0(
|
|||
; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_volatile_load_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: scratch_load_b32 v0, off, s2 glc dlc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_volatile_load_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: scratch_load_b32 v0, off, s2 glc dlc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(5)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%val = load volatile i32, i32 addrspace(5)* %in, align 4
|
||||
|
@ -191,6 +217,30 @@ define amdgpu_kernel void @private_volatile_load_1(
|
|||
; SKIP-CACHE-INV-NEXT: s_mov_b32 s2, -1
|
||||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_volatile_load_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: scratch_load_b32 v0, v0, s2 glc dlc
|
||||
; GFX11-WGP-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-WGP-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_volatile_load_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_load_b32 s2, s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: scratch_load_b32 v0, v0, s2 glc dlc
|
||||
; GFX11-CU-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX11-CU-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(5)* %in, i32 addrspace(1)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
@ -291,6 +341,32 @@ define amdgpu_kernel void @private_volatile_store_0(
|
|||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_volatile_store_0:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX11-WGP-NEXT: scratch_store_b32 off, v0, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_volatile_store_0:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
|
||||
; GFX11-CU-NEXT: scratch_store_b32 off, v0, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(5)* %out) {
|
||||
entry:
|
||||
%val = load i32, i32 addrspace(1)* %in, align 4
|
||||
|
@ -392,6 +468,34 @@ define amdgpu_kernel void @private_volatile_store_1(
|
|||
; SKIP-CACHE-INV-NEXT: buffer_store_dword v1, v0, s[4:7], 0 offen
|
||||
; SKIP-CACHE-INV-NEXT: s_waitcnt vmcnt(0)
|
||||
; SKIP-CACHE-INV-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-WGP-LABEL: private_volatile_store_1:
|
||||
; GFX11-WGP: ; %bb.0: ; %entry
|
||||
; GFX11-WGP-NEXT: s_clause 0x1
|
||||
; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-WGP-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-WGP-NEXT: scratch_store_b32 v0, v1, s0
|
||||
; GFX11-WGP-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-WGP-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-CU-LABEL: private_volatile_store_1:
|
||||
; GFX11-CU: ; %bb.0: ; %entry
|
||||
; GFX11-CU-NEXT: s_clause 0x1
|
||||
; GFX11-CU-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
|
||||
; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x8
|
||||
; GFX11-CU-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: s_load_b32 s1, s[2:3], 0x0
|
||||
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s1
|
||||
; GFX11-CU-NEXT: scratch_store_b32 v0, v1, s0
|
||||
; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX11-CU-NEXT: s_endpgm
|
||||
i32 addrspace(1)* %in, i32 addrspace(5)* %out) {
|
||||
entry:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
|
|
Loading…
Reference in New Issue