86 lines
3.4 KiB
LLVM
86 lines
3.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
|
|
|
define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
|
; VENTUS-LABEL: foo_ker:
|
|
; VENTUS: # %bb.0: # %entry
|
|
; VENTUS-NEXT: addi sp, sp, 12
|
|
; VENTUS-NEXT: .cfi_def_cfa_offset 12
|
|
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: .cfi_offset ra, 0
|
|
; VENTUS-NEXT: lw t0, 0(a0)
|
|
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: lw t0, 4(a0)
|
|
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: vmv.v.x v0, zero
|
|
; VENTUS-NEXT: call _Z13get_global_idj
|
|
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
|
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: vadd.vx v1, v0, t1
|
|
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
|
; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: vadd.vx v0, v0, t0
|
|
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
|
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
|
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
|
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: addi sp, sp, -12
|
|
; VENTUS-NEXT: ret
|
|
entry:
|
|
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
|
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
|
|
%0 = load i32, ptr addrspace(1) %arrayidx, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
|
|
%1 = load i32, ptr addrspace(1) %arrayidx1, align 4
|
|
%add = add nsw i32 %1, %0
|
|
store i32 %add, ptr addrspace(1) %arrayidx1, align 4
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
|
|
; VENTUS-LABEL: foo_fun:
|
|
; VENTUS: # %bb.0: # %entry
|
|
; VENTUS-NEXT: addi sp, sp, 4
|
|
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
|
; VENTUS-NEXT: addi tp, tp, 8
|
|
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
|
; VENTUS-NEXT: regext zero, zero, 1
|
|
; VENTUS-NEXT: vmv.v.x v32, tp
|
|
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: .cfi_offset ra, 8
|
|
; VENTUS-NEXT: .cfi_offset v33.l, 4
|
|
; VENTUS-NEXT: .cfi_offset v34.l, 0
|
|
; VENTUS-NEXT: regext zero, zero, 1
|
|
; VENTUS-NEXT: vadd.vx v33, v1, zero
|
|
; VENTUS-NEXT: regext zero, zero, 1
|
|
; VENTUS-NEXT: vadd.vx v34, v0, zero
|
|
; VENTUS-NEXT: vmv.v.x v0, zero
|
|
; VENTUS-NEXT: call _Z13get_global_idj
|
|
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
|
; VENTUS-NEXT: regext zero, zero, 64
|
|
; VENTUS-NEXT: vadd.vv v1, v33, v0
|
|
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
|
; VENTUS-NEXT: regext zero, zero, 64
|
|
; VENTUS-NEXT: vadd.vv v0, v34, v0
|
|
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
|
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
|
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
|
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: addi sp, sp, -4
|
|
; VENTUS-NEXT: addi tp, tp, -8
|
|
; VENTUS-NEXT: ret
|
|
entry:
|
|
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
|
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
|
|
%0 = load i32, ptr addrspace(1) %arrayidx, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
|
|
%1 = load i32, ptr addrspace(1) %arrayidx1, align 4
|
|
%add = add nsw i32 %1, %0
|
|
store i32 %add, ptr addrspace(1) %arrayidx1, align 4
|
|
ret void
|
|
}
|
|
|
|
|
|
declare dso_local i32 @_Z13get_global_idj(i32 noundef)
|