llvm-project/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll

57 lines
2.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
define dso_local i32 @bar(i32 noundef %a, i32 noundef %b) {
; VENTUS-LABEL: bar:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vadd.vv v0, v0, v1
; VENTUS-NEXT: ret
entry:
%a.addr = alloca i32, align 4, addrspace(5)
%b.addr = alloca i32, align 4, addrspace(5)
store i32 %a, ptr addrspace(5) %a.addr, align 4
store i32 %b, ptr addrspace(5) %b.addr, align 4
%0 = load i32, ptr addrspace(5) %a.addr, align 4
%1 = load i32, ptr addrspace(5) %b.addr, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
; Function Attrs: convergent noinline norecurse nounwind optnone vscale_range(1,2048)
define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: lw t0, 8(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: vmv.v.x v0, t1
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: call bar
; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
%a.addr = alloca i32, align 4, addrspace(5)
%b.addr = alloca i32, align 4, addrspace(5)
%c.addr = alloca ptr addrspace(1), align 4, addrspace(5)
store i32 %a, ptr addrspace(5) %a.addr, align 4
store i32 %b, ptr addrspace(5) %b.addr, align 4
store ptr addrspace(1) %c, ptr addrspace(5) %c.addr, align 4
%0 = load i32, ptr addrspace(5) %a.addr, align 4
%1 = load i32, ptr addrspace(5) %b.addr, align 4
%call = call i32 @bar(i32 noundef %0, i32 noundef %1)
%2 = load ptr addrspace(1), ptr addrspace(5) %c.addr, align 4
store i32 %call, ptr addrspace(1) %2, align 4
ret void
}