forked from OSchip/llvm-project
140 lines
5.2 KiB
LLVM
140 lines
5.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
|
|
|
|
define amdgpu_cs void @if_then(<4 x i32> inreg %input, <4 x i32> inreg %output, <3 x i32> %LocalInvocationId) {
|
|
; GCN-LABEL: if_then:
|
|
; GCN: ; %bb.0: ; %.entry
|
|
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
|
; GCN-NEXT: ; %bb.1: ; %.bb0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 1
|
|
; GCN-NEXT: ; %bb.2: ; %.merge
|
|
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 3, v0
|
|
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
|
; GCN-NEXT: s_cbranch_execz .LBB0_4
|
|
; GCN-NEXT: ; %bb.3: ; %.then
|
|
; GCN-NEXT: v_mov_b32_e32 v1, v3
|
|
; GCN-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GCN-NEXT: s_or_saveexec_b32 s1, -1
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: v_mov_b32_dpp v2, v1 row_shr:1 row_mask:0xf bank_mask:0xf
|
|
; GCN-NEXT: s_mov_b32 exec_lo, s1
|
|
; GCN-NEXT: v_mov_b32_e32 v0, v2
|
|
; GCN-NEXT: v_mov_b32_e32 v4, -1
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-NEXT: buffer_store_dword v4, v0, s[4:7], 0 offen
|
|
; GCN-NEXT: .LBB0_4: ; %.end
|
|
; GCN-NEXT: s_waitcnt_depctr 0xffe3
|
|
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GCN-NEXT: v_mov_b32_e32 v0, -1
|
|
; GCN-NEXT: buffer_store_dword v0, v3, s[4:7], 0 offen
|
|
; GCN-NEXT: s_endpgm
|
|
.entry:
|
|
%LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i32 0
|
|
%.not10002 = icmp eq i32 %LocalInvocationId.i0, 0
|
|
br i1 %.not10002, label %.merge, label %.bb0
|
|
|
|
.bb0:
|
|
br label %.merge
|
|
|
|
.merge:
|
|
%src = phi i32 [ 0, %.entry ], [ 1, %.bb0 ]
|
|
%i530 = icmp ult i32 %LocalInvocationId.i0, 4
|
|
br i1 %i530, label %.end, label %.then
|
|
|
|
.then:
|
|
%i562 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src, i32 0)
|
|
%i563 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %i562, i32 273, i32 15, i32 15, i1 false)
|
|
%i564 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %i563)
|
|
call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %i564, i32 0, i32 0)
|
|
br label %.end
|
|
|
|
.end:
|
|
%idx = phi i32 [ 0, %.then ], [ %src, %.merge ]
|
|
call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %idx, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
|
|
define amdgpu_cs void @if_else_vgpr_opt(<4 x i32> inreg %input, <4 x i32> inreg %output, <3 x i32> %LocalInvocationId) {
|
|
; GCN-LABEL: if_else_vgpr_opt:
|
|
; GCN: ; %bb.0: ; %.entry
|
|
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0
|
|
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
|
; GCN-NEXT: ; %bb.1: ; %.bb0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 1
|
|
; GCN-NEXT: ; %bb.2: ; %.merge
|
|
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
|
|
; GCN-NEXT: v_cmp_lt_u32_e32 vcc_lo, 3, v0
|
|
; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
|
; GCN-NEXT: s_xor_b32 s0, exec_lo, s0
|
|
; GCN-NEXT: s_cbranch_execnz .LBB1_5
|
|
; GCN-NEXT: ; %bb.3: ; %Flow
|
|
; GCN-NEXT: s_andn2_saveexec_b32 s0, s0
|
|
; GCN-NEXT: s_cbranch_execnz .LBB1_6
|
|
; GCN-NEXT: .LBB1_4: ; %.end
|
|
; GCN-NEXT: s_endpgm
|
|
; GCN-NEXT: .LBB1_5: ; %.else
|
|
; GCN-NEXT: s_or_saveexec_b32 s1, -1
|
|
; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
; GCN-NEXT: s_mov_b32 exec_lo, s1
|
|
; GCN-NEXT: v_mov_b32_e32 v2, v3
|
|
; GCN-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
; GCN-NEXT: s_not_b32 exec_lo, exec_lo
|
|
; GCN-NEXT: s_or_saveexec_b32 s1, -1
|
|
; GCN-NEXT: v_mov_b32_dpp v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf
|
|
; GCN-NEXT: s_mov_b32 exec_lo, s1
|
|
; GCN-NEXT: v_mov_b32_e32 v0, v1
|
|
; GCN-NEXT: v_mov_b32_e32 v3, -1
|
|
; GCN-NEXT: buffer_store_dword v3, v0, s[4:7], 0 offen
|
|
; GCN-NEXT: ; implicit-def: $vgpr3
|
|
; GCN-NEXT: s_andn2_saveexec_b32 s0, s0
|
|
; GCN-NEXT: s_cbranch_execz .LBB1_4
|
|
; GCN-NEXT: .LBB1_6: ; %.then
|
|
; GCN-NEXT: v_mov_b32_e32 v0, -1
|
|
; GCN-NEXT: buffer_store_dword v0, v3, s[4:7], 0 offen
|
|
; GCN-NEXT: s_endpgm
|
|
.entry:
|
|
%LocalInvocationId.i0 = extractelement <3 x i32> %LocalInvocationId, i32 0
|
|
%.not10002 = icmp eq i32 %LocalInvocationId.i0, 0
|
|
br i1 %.not10002, label %.merge, label %.bb0
|
|
|
|
.bb0:
|
|
br label %.merge
|
|
|
|
.merge:
|
|
%src = phi i32 [ 0, %.entry ], [ 1, %.bb0 ]
|
|
%i530 = icmp ult i32 %LocalInvocationId.i0, 4
|
|
br i1 %i530, label %.then, label %.else
|
|
|
|
.then:
|
|
call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %src, i32 0, i32 0)
|
|
br label %.end
|
|
|
|
.else:
|
|
%i562 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src, i32 0)
|
|
%i563 = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %i562, i32 273, i32 15, i32 15, i1 false)
|
|
%i564 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %i563)
|
|
call void @llvm.amdgcn.raw.buffer.store.i32(i32 -1, <4 x i32> %output, i32 %i564, i32 0, i32 0)
|
|
br label %.end
|
|
|
|
.end:
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
|
|
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
|
|
declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1
|
|
declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32 immarg, i32 immarg) #2
|
|
|
|
attributes #0 = { convergent nounwind readnone willreturn }
|
|
attributes #1 = { convergent nounwind readnone speculatable willreturn }
|
|
attributes #2 = { nounwind willreturn writeonly }
|
|
|