[NFC][fix] Fix test cases failure

This commit is contained in:
zhoujing 2024-03-07 10:57:30 +08:00
parent efef613b61
commit 9b11eb8feb
19 changed files with 1864 additions and 352 deletions

View File

@ -1,4 +1,4 @@
// RUN: clang -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
void test() {
// CHECK: call void @llvm.riscv.ventus.barrier(i32 1)

View File

@ -11,13 +11,15 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: regext zero, zero, 1
@ -25,19 +27,23 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z12get_local_idj
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t1, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v0, v0, t1
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: vsll.vi v1, v33, 2
; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v1, v1, t0
; VENTUS-NEXT: vlw12.v v2, 0(v1)
; VENTUS-NEXT: vadd.vv v0, v2, v0
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -12
; VENTUS-NEXT: addi tp, tp, -4
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)

View File

@ -7,23 +7,25 @@
define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: addi s0, s0, 20
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: addi tp, tp, 24
; VENTUS-NEXT: .cfi_def_cfa_offset 24
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -24(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v33, t0
; VENTUS-NEXT: lui t1, %hi(foo.b)
; VENTUS-NEXT: addi t2, t1, %lo(foo.b)
; VENTUS-NEXT: addi t1, tp, -24
; VENTUS-NEXT: addi t1, tp, -20
; VENTUS-NEXT: addi t2, s0, -20
; VENTUS-NEXT: vmv.v.x v0, t1
; VENTUS-NEXT: sw t2, 16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v1, t2
; VENTUS-NEXT: vmv.v.x v2, t0
; VENTUS-NEXT: call bar
@ -37,10 +39,10 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: vbltu v1, v0, .LBB0_2
; VENTUS-NEXT: # %bb.1: # %if.then
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: addi t0, tp, -24
; VENTUS-NEXT: addi t0, tp, -20
; VENTUS-NEXT: vadd.vx v1, v0, t0
; VENTUS-NEXT: vlw.v v1, 0(v1)
; VENTUS-NEXT: lw t1, 16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi t1, s0, -20
; VENTUS-NEXT: vadd.vx v2, v0, t1
; VENTUS-NEXT: vlw12.v v2, 0(v2)
; VENTUS-NEXT: regext zero, zero, 64
@ -60,9 +62,14 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: .LBB0_3: # %if.end
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -24(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi s0, s0, -20
; VENTUS-NEXT: addi tp, tp, -24
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%a = alloca [5 x i32], align 4, addrspace(5)
@ -314,6 +321,8 @@ define dso_local i32 @stack_space(ptr addrspace(3) nocapture noundef readnone %a
; VENTUS-NEXT: vadd.vx v0, v0, t0
; VENTUS-NEXT: vlw.v v0, 0(v0)
; VENTUS-NEXT: addi tp, tp, -48
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%test = alloca [12 x i32], align 4, addrspace(5)

View File

@ -6,17 +6,19 @@
define i32 @foo(i32 noundef %cond, i32 noundef %a, i32 noundef %b, i32 noundef %c) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmv.v.x v4, zero
; VENTUS-NEXT: vmv.v.x v5, zero
; VENTUS-NEXT: regexti zero, zero, 0
; VENTUS-NEXT: vrsub.vi v4, v3, 0
; VENTUS-NEXT: .Lpcrel_hi0:
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB0_2)
; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi0)
; VENTUS-NEXT: vbne v0, v4, .LBB0_2
; VENTUS-NEXT: # %bb.1:
; VENTUS-NEXT: vrsub.vi v3, v3, 0
; VENTUS-NEXT: vbeq v0, v5, .LBB0_2
; VENTUS-NEXT: # %bb.1: # %entry
; VENTUS-NEXT: vadd.vx v4, v3, zero
; VENTUS-NEXT: .LBB0_2: # %entry
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: vmadd.vv v2, v1, v3
; VENTUS-NEXT: vmadd.vv v2, v1, v4
; VENTUS-NEXT: vadd.vx v0, v2, zero
; VENTUS-NEXT: ret
entry:

View File

@ -11,9 +11,13 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v33, v1, zero
@ -30,9 +34,15 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)

View File

@ -7,24 +7,24 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 12
; VENTUS-NEXT: .cfi_def_cfa_offset 12
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t1, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v1, v0, t1
; VENTUS-NEXT: vlw12.v v1, 0(v1)
; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v0, v0, t0
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -12
; VENTUS-NEXT: ret
entry:
@ -47,9 +47,13 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v33, v1, zero
@ -66,9 +70,15 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)

View File

@ -12,30 +12,30 @@ define dso_local ventus_kernel void @kernel_calling_convention(ptr addrspace(1)
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 8(a0)
; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: lw s0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, 0(s0)
; VENTUS-NEXT: lw s1, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, 0(s1)
; VENTUS-NEXT: lw t2, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t1, 0(t2)
; VENTUS-NEXT: vadd.vx v0, v0, t0
; VENTUS-NEXT: vadd.vx v0, v0, t1
; VENTUS-NEXT: vmv.v.x v1, s0
; VENTUS-NEXT: vmv.v.x v1, s1
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, 0(s0)
; VENTUS-NEXT: lw s1, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw t0, 0(s1)
; VENTUS-NEXT: lw t2, 0(t2)
; VENTUS-NEXT: add t0, t2, t0
; VENTUS-NEXT: sw t0, 0(s0)
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: sw t0, 0(s1)
; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: ret
entry:
@ -81,12 +81,10 @@ entry:
define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readonly %a1, ptr nocapture noundef readonly %a2, ptr nocapture noundef readonly %a3, ptr nocapture noundef readonly %a4, ptr nocapture noundef readonly %a5, ptr nocapture noundef readonly %a6, ptr nocapture noundef readonly %a7, ptr nocapture noundef readonly %a8, ptr nocapture noundef readonly %a9, ptr nocapture noundef readonly %a10, ptr nocapture noundef readonly %a11, ptr nocapture noundef readonly %a12, ptr nocapture noundef readonly %a13, ptr nocapture noundef readonly %a14, ptr nocapture noundef readonly %a15, ptr nocapture noundef readonly %a16, ptr nocapture noundef readonly %a17, ptr nocapture noundef readonly %a18, ptr nocapture noundef readonly %a19, ptr nocapture noundef readonly %a20, ptr nocapture noundef readonly %a21, ptr nocapture noundef readonly %a22, ptr nocapture noundef readonly %a23, ptr nocapture noundef readonly %a24, ptr nocapture noundef readonly %a25, ptr nocapture noundef readonly %a26, ptr nocapture noundef readonly %a27, ptr nocapture noundef readonly %a28, ptr nocapture noundef readonly %a29, ptr nocapture noundef readonly %a30, ptr nocapture noundef readonly %a31, ptr nocapture noundef readonly %a32, ptr addrspace(3) nocapture noundef readonly %a33, ptr addrspace(5) nocapture noundef readonly %a34) local_unnamed_addr #2 {
; VENTUS-LABEL: non_kernel_calling_convention:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -12(v32)
; VENTUS-NEXT: regext zero, zero, 9
@ -160,7 +158,13 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon
; VENTUS-NEXT: vadd.vv v0, v0, v1
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vadd.vv v0, v0, v3
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%0 = load i32, ptr %a1, align 4
@ -269,27 +273,29 @@ define dso_local i32 @test_add(ptr nocapture noundef readonly %a, ptr nocapture
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: vadd.vi v0, v0, 1
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -8(v32)
; VENTUS-NEXT: vsw.v v0, -4(v32)
; VENTUS-NEXT: vlw12.v v0, 0(v1)
; VENTUS-NEXT: vadd.vi v0, v0, 2
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -4(v32)
; VENTUS-NEXT: addi t0, tp, -8
; VENTUS-NEXT: addi t1, tp, -4
; VENTUS-NEXT: vsw.v v0, -8(v32)
; VENTUS-NEXT: addi t0, tp, -4
; VENTUS-NEXT: addi t1, tp, -8
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vmv.v.x v1, t1
; VENTUS-NEXT: call add
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vlw.v v1, -8(v32)
; VENTUS-NEXT: vlw.v v1, -4(v32)
; VENTUS-NEXT: vadd.vv v0, v1, v0
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%d = alloca i32, align 4, addrspace(5)

File diff suppressed because it is too large Load Diff

View File

@ -25,7 +25,7 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: lw t0, 8(a0)
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
@ -37,7 +37,7 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add
; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vmv.v.x v1, t0
; VENTUS-NEXT: vsw12.v v0, 0(v1)
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:

View File

@ -285,6 +285,7 @@ define i32 @vmulhsu_x(i32 %a) nounwind {
define i32 @vrsub_i(i32 %a) nounwind {
; VENTUS-LABEL: vrsub_i:
; VENTUS: # %bb.0:
; VENTUS-NEXT: regexti zero, zero, 0
; VENTUS-NEXT: vrsub.vi v0, v0, 12
; VENTUS-NEXT: ret
%1 = sub i32 12, %a

View File

@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck %s
@ -6,11 +7,20 @@
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
; Here we foucus on kernel struct argument
define dso_local ventus_kernel void @test_kernel1(i8 noundef %c, %struct.MyStruct %st.coerce, i8 noundef %uc, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
; CHECK-LABEL: test_kernel1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lb t0, 0(a0)
; CHECK-NEXT: lbu t1, 24(a0)
; CHECK-NEXT: lw t2, 28(a0)
; CHECK-NEXT: fcvt.s.w t0, t0
; CHECK-NEXT: lw s1, 8(a0)
; CHECK-NEXT: sw t0, 0(t2)
; CHECK-NEXT: fcvt.s.w t0, s1
; CHECK-NEXT: fcvt.s.wu t1, t1
; CHECK-NEXT: sw t0, 4(t2)
; CHECK-NEXT: sw t1, 8(t2)
; CHECK-NEXT: ret
entry:
; CHECK: lb t0, 0(a0)
; CHECK: lbu t1, 24(a0)
; CHECK: lw t2, 28(a0)
; CHECK: lw s0, 8(a0)
%st.coerce.fca.0.extract = extractvalue %struct.MyStruct %st.coerce, 0
%conv = sitofp i8 %c to float
store float %conv, ptr addrspace(1) %result, align 4
@ -26,15 +36,31 @@ entry:
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048)
; Here we foucus on scalar argument
define dso_local ventus_kernel void @test_kernel2(i8 noundef %c, i8 noundef %uc, i16 noundef %s, i16 noundef %us, i32 noundef %i, i32 noundef %ui, float noundef %f, ptr addrspace(1) nocapture noundef writeonly align 4 %result) {
; CHECK-LABEL: test_kernel2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lw t0, 24(a0)
; CHECK-NEXT: lw t1, 20(a0)
; CHECK-NEXT: lw t2, 16(a0)
; CHECK-NEXT: lhu s1, 12(a0)
; CHECK-NEXT: lh a1, 8(a0)
; CHECK-NEXT: lb a2, 0(a0)
; CHECK-NEXT: lbu a3, 4(a0)
; CHECK-NEXT: lw a0, 28(a0)
; CHECK-NEXT: fcvt.s.w a2, a2
; CHECK-NEXT: fcvt.s.wu a3, a3
; CHECK-NEXT: sw a2, 0(a0)
; CHECK-NEXT: sw a3, 4(a0)
; CHECK-NEXT: fcvt.s.w a1, a1
; CHECK-NEXT: fcvt.s.wu s1, s1
; CHECK-NEXT: sw a1, 8(a0)
; CHECK-NEXT: sw s1, 12(a0)
; CHECK-NEXT: fcvt.s.w t2, t2
; CHECK-NEXT: fcvt.s.wu t1, t1
; CHECK-NEXT: sw t2, 16(a0)
; CHECK-NEXT: sw t1, 20(a0)
; CHECK-NEXT: sw t0, 24(a0)
; CHECK-NEXT: ret
entry:
; CHECK: flw t0, 24(a0)
; CHECK: lw t1, 20(a0)
; CHECK: lw t2, 16(a0)
; CHECK: lhu s0, 12(a0)
; CHECK: lh s1, 8(a0)
; CHECK: lb a1, 0(a0)
; CHECK: lbu a2, 4(a0)
; CHECK: lw a0, 28(a0)
%conv = sitofp i8 %c to float
store float %conv, ptr addrspace(1) %result, align 4
%conv1 = uitofp i8 %uc to float
@ -60,109 +86,381 @@ entry:
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: write) vscale_range(1,2048)
; Here we foucus on vector argument
define dso_local ventus_kernel void @test_kernel3(<2 x i8> noundef %c, <2 x i8> noundef %uc, <2 x i16> noundef %s, <2 x i16> noundef %us, <2 x i32> noundef %i, <2 x i32> noundef %ui, <2 x float> noundef %f, ptr addrspace(1) nocapture noundef writeonly align 8 %result) {
; CHECK-LABEL: test_kernel3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, 56
; CHECK-NEXT: .cfi_def_cfa_offset 56
; CHECK-NEXT: addi tp, tp, 4
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v32, tp
; CHECK-NEXT: sw ra, -56(sp) # 4-byte Folded Spill
; CHECK-NEXT: regext zero, zero, 72
; CHECK-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, 0
; CHECK-NEXT: .cfi_offset v33.l, 0
; CHECK-NEXT: lw t0, 36(a0)
; CHECK-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 32(a0)
; CHECK-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 28(a0)
; CHECK-NEXT: sw t0, -12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 24(a0)
; CHECK-NEXT: sw t0, -16(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 20(a0)
; CHECK-NEXT: sw t0, -20(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 16(a0)
; CHECK-NEXT: sw t0, -24(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 14(a0)
; CHECK-NEXT: sw t0, -28(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 12(a0)
; CHECK-NEXT: sw t0, -32(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 10(a0)
; CHECK-NEXT: sw t0, -36(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 8(a0)
; CHECK-NEXT: sw t0, -40(sp) # 4-byte Folded Spill
; CHECK-NEXT: lbu t0, 5(a0)
; CHECK-NEXT: sw t0, -44(sp) # 4-byte Folded Spill
; CHECK-NEXT: lbu t0, 4(a0)
; CHECK-NEXT: sw t0, -48(sp) # 4-byte Folded Spill
; CHECK-NEXT: lbu t0, 1(a0)
; CHECK-NEXT: lbu t1, 0(a0)
; CHECK-NEXT: lw t2, 40(a0)
; CHECK-NEXT: sw t2, -52(sp) # 4-byte Folded Spill
; CHECK-NEXT: vmv.v.x v0, t1
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: call _Z14convert_float2Dv2_c
; CHECK-NEXT: lw t0, -52(sp) # 4-byte Folded Reload
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v33, t0
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 4(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 0(v33)
; CHECK-NEXT: lw t0, -48(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -44(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: call _Z14convert_float2Dv2_h
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 12(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 8(v33)
; CHECK-NEXT: lw t0, -40(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -36(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: call _Z14convert_float2Dv2_s
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 20(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 16(v33)
; CHECK-NEXT: lw t0, -32(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -28(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: call _Z14convert_float2Dv2_t
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 28(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 24(v33)
; CHECK-NEXT: lw t0, -24(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -20(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: call _Z14convert_float2Dv2_i
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 36(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 32(v33)
; CHECK-NEXT: lw t0, -16(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -12(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: call _Z14convert_float2Dv2_j
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 44(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 40(v33)
; CHECK-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: call _Z14convert_float2Dv2_f
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 52(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 48(v33)
; CHECK-NEXT: lw ra, -56(sp) # 4-byte Folded Reload
; CHECK-NEXT: regext zero, zero, 9
; CHECK-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, -56
; CHECK-NEXT: addi tp, tp, -4
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v32, tp
; CHECK-NEXT: ret
entry:
;CHECK: flw t0, 36(a0)
;CHECK: flw t0, 32(a0)
;CHECK: lw t0, 28(a0)
;CHECK: lw t0, 24(a0)
;CHECK: lw t0, 20(a0)
;CHECK: lw t0, 16(a0)
;CHECK: lhu t0, 14(a0)
;CHECK: lhu t0, 12(a0)
;CHECK: lhu t0, 10(a0)
;CHECK: lhu t0, 8(a0)
;CHECK: lbu t0, 5(a0)
;CHECK: lbu t0, 4(a0)
;CHECK: lbu t0, 1(a0)
;CHECK: lbu t1, 0(a0)
;CHECK: lw t2, 40(a0)
%call = call <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef %c)
%call = call <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef %c)
store <2 x float> %call, ptr addrspace(1) %result, align 8
%call1 = call <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef %uc)
%call1 = call <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef %uc)
%arrayidx2 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 1
store <2 x float> %call1, ptr addrspace(1) %arrayidx2, align 8
%call3 = call <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef %s)
%call3 = call <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef %s)
%arrayidx4 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 2
store <2 x float> %call3, ptr addrspace(1) %arrayidx4, align 8
%call5 = call <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef %us)
%call5 = call <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef %us)
%arrayidx6 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 3
store <2 x float> %call5, ptr addrspace(1) %arrayidx6, align 8
%call7 = call <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef %i)
%call7 = call <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef %i)
%arrayidx8 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 4
store <2 x float> %call7, ptr addrspace(1) %arrayidx8, align 8
%call9 = call <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef %ui)
%call9 = call <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef %ui)
%arrayidx10 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 5
store <2 x float> %call9, ptr addrspace(1) %arrayidx10, align 8
%call11 = call <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef %f)
%call11 = call <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef %f)
%arrayidx12 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 6
store <2 x float> %call11, ptr addrspace(1) %arrayidx12, align 8
ret void
}
declare dso_local <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef)
declare dso_local <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef)
; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: write) vscale_range(1,2048)
; Here we foucus on vector argument
define dso_local ventus_kernel void @test_kernel4(<4 x i8> noundef %c, <4 x i8> noundef %uc, <4 x i16> noundef %s, <4 x i16> noundef %us, <4 x i32> noundef %i, <4 x i32> noundef %ui, <4 x float> noundef %f, ptr addrspace(1) nocapture noundef writeonly align 16 %result) {
; CHECK-LABEL: test_kernel4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: addi sp, sp, 104
; CHECK-NEXT: .cfi_def_cfa_offset 104
; CHECK-NEXT: addi tp, tp, 4
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v32, tp
; CHECK-NEXT: sw ra, -104(sp) # 4-byte Folded Spill
; CHECK-NEXT: regext zero, zero, 72
; CHECK-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, 0
; CHECK-NEXT: .cfi_offset v33.l, 0
; CHECK-NEXT: lw t0, 76(a0)
; CHECK-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 72(a0)
; CHECK-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 68(a0)
; CHECK-NEXT: sw t0, -12(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 64(a0)
; CHECK-NEXT: sw t0, -16(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 60(a0)
; CHECK-NEXT: sw t0, -20(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 56(a0)
; CHECK-NEXT: sw t0, -24(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 52(a0)
; CHECK-NEXT: sw t0, -28(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 48(a0)
; CHECK-NEXT: sw t0, -32(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 44(a0)
; CHECK-NEXT: sw t0, -36(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 40(a0)
; CHECK-NEXT: sw t0, -40(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 36(a0)
; CHECK-NEXT: sw t0, -44(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 32(a0)
; CHECK-NEXT: sw t0, -48(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 22(a0)
; CHECK-NEXT: sw t0, -52(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 20(a0)
; CHECK-NEXT: sw t0, -56(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 18(a0)
; CHECK-NEXT: sw t0, -60(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 16(a0)
; CHECK-NEXT: sw t0, -64(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 14(a0)
; CHECK-NEXT: sw t0, -68(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 12(a0)
; CHECK-NEXT: sw t0, -72(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 10(a0)
; CHECK-NEXT: sw t0, -76(sp) # 4-byte Folded Spill
; CHECK-NEXT: lhu t0, 8(a0)
; CHECK-NEXT: sw t0, -80(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 4(a0)
; CHECK-NEXT: lbu t1, 4(a0)
; CHECK-NEXT: sw t1, -88(sp) # 4-byte Folded Spill
; CHECK-NEXT: srli t1, t0, 24
; CHECK-NEXT: sw t1, -84(sp) # 4-byte Folded Spill
; CHECK-NEXT: srli t1, t0, 8
; CHECK-NEXT: andi t1, t1, 255
; CHECK-NEXT: sw t1, -92(sp) # 4-byte Folded Spill
; CHECK-NEXT: srli t0, t0, 16
; CHECK-NEXT: andi t0, t0, 255
; CHECK-NEXT: sw t0, -96(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 80(a0)
; CHECK-NEXT: sw t0, -100(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 0(a0)
; CHECK-NEXT: lbu t1, 0(a0)
; CHECK-NEXT: srli t2, t0, 24
; CHECK-NEXT: srli s1, t0, 8
; CHECK-NEXT: andi s1, s1, 255
; CHECK-NEXT: srli t0, t0, 16
; CHECK-NEXT: andi t0, t0, 255
; CHECK-NEXT: vmv.v.x v0, t1
; CHECK-NEXT: vmv.v.x v1, s1
; CHECK-NEXT: vmv.v.x v2, t0
; CHECK-NEXT: vmv.v.x v3, t2
; CHECK-NEXT: call _Z14convert_float4Dv4_c
; CHECK-NEXT: lw t0, -100(sp) # 4-byte Folded Reload
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v33, t0
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v3, 12(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v2, 8(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 4(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 0(v33)
; CHECK-NEXT: lw t0, -88(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -92(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: lw t0, -96(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v2, t0
; CHECK-NEXT: lw t0, -84(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v3, t0
; CHECK-NEXT: call _Z14convert_float4Dv4_h
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v3, 28(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v2, 24(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 20(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 16(v33)
; CHECK-NEXT: lw t0, -80(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -76(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: lw t0, -72(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v2, t0
; CHECK-NEXT: lw t0, -68(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v3, t0
; CHECK-NEXT: call _Z14convert_float4Dv4_s
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v3, 44(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v2, 40(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 36(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 32(v33)
; CHECK-NEXT: lw t0, -64(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -60(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: lw t0, -56(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v2, t0
; CHECK-NEXT: lw t0, -52(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v3, t0
; CHECK-NEXT: call _Z14convert_float4Dv4_t
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v3, 60(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v2, 56(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 52(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 48(v33)
; CHECK-NEXT: lw t0, -48(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -44(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: lw t0, -40(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v2, t0
; CHECK-NEXT: lw t0, -36(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v3, t0
; CHECK-NEXT: call _Z14convert_float4Dv4_i
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v3, 76(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v2, 72(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 68(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 64(v33)
; CHECK-NEXT: lw t0, -32(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -28(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: lw t0, -24(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v2, t0
; CHECK-NEXT: lw t0, -20(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v3, t0
; CHECK-NEXT: call _Z14convert_float4Dv4_j
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v3, 92(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v2, 88(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 84(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 80(v33)
; CHECK-NEXT: lw t0, -16(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v0, t0
; CHECK-NEXT: lw t0, -12(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v1, t0
; CHECK-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v2, t0
; CHECK-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; CHECK-NEXT: vmv.v.x v3, t0
; CHECK-NEXT: call _Z14convert_float4Dv4_f
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v3, 108(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v2, 104(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v1, 100(v33)
; CHECK-NEXT: regext zero, zero, 8
; CHECK-NEXT: vsw12.v v0, 96(v33)
; CHECK-NEXT: lw ra, -104(sp) # 4-byte Folded Reload
; CHECK-NEXT: regext zero, zero, 9
; CHECK-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, -104
; CHECK-NEXT: addi tp, tp, -4
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v32, tp
; CHECK-NEXT: ret
entry:
;CHECK: flw t0, 76(a0)
;CHECK: flw t0, 72(a0)
;CHECK: flw t0, 68(a0)
;CHECK: flw t0, 64(a0)
;CHECK: lw t0, 60(a0)
;CHECK: lw t0, 56(a0)
;CHECK: lw t0, 52(a0)
;CHECK: lw t0, 48(a0)
;CHECK: lw t0, 44(a0)
;CHECK: lw t0, 40(a0)
;CHECK: lw t0, 36(a0)
;CHECK: lw t0, 32(a0)
;CHECK: lhu t0, 22(a0)
;CHECK: lhu t0, 20(a0)
;CHECK: lhu t0, 18(a0)
;CHECK: lhu t0, 16(a0)
;CHECK: lhu t0, 14(a0)
;CHECK: lhu t0, 12(a0)
;CHECK: lhu t0, 10(a0)
;CHECK: lhu t0, 8(a0)
;CHECK: lw t0, 4(a0)
;CHECK: lbu t1, 4(a0)
;CHECK: lw t0, 80(a0)
;CHECK: lw t0, 0(a0)
;CHECK: lbu t1, 0(a0)
%call = call <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef %c)
store <4 x float> %call, ptr addrspace(1) %result, align 16
%call1 = call <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef %uc)
%call = call <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef %c)
store <4 x float> %call, ptr addrspace(1) %result, align 16
%call1 = call <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef %uc)
%arrayidx2 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 1
store <4 x float> %call1, ptr addrspace(1) %arrayidx2, align 16
%call3 = call <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef %s)
%call3 = call <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef %s)
%arrayidx4 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 2
store <4 x float> %call3, ptr addrspace(1) %arrayidx4, align 16
%call5 = call <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef %us)
store <4 x float> %call3, ptr addrspace(1) %arrayidx4, align 16
%call5 = call <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef %us)
%arrayidx6 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 3
store <4 x float> %call5, ptr addrspace(1) %arrayidx6, align 16
%call7 = call <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef %i)
store <4 x float> %call5, ptr addrspace(1) %arrayidx6, align 16
%call7 = call <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef %i)
%arrayidx8 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 4
store <4 x float> %call7, ptr addrspace(1) %arrayidx8, align 16
%call9 = call <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef %ui)
store <4 x float> %call7, ptr addrspace(1) %arrayidx8, align 16
%call9 = call <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef %ui)
%arrayidx10 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 5
store <4 x float> %call9, ptr addrspace(1) %arrayidx10, align 16
%call11 = call <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef %f)
store <4 x float> %call9, ptr addrspace(1) %arrayidx10, align 16
%call11 = call <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef %f)
%arrayidx12 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 6
store <4 x float> %call11, ptr addrspace(1) %arrayidx12, align 16
ret void
}
declare dso_local <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef)
declare dso_local <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef)

View File

@ -176,13 +176,51 @@ entry:
ret i1 %res
}
define dso_local ventus_kernel void @regexti13(ptr addrspace(1) nocapture
noundef align 4 %A, ptr addrspace(3) nocapture noundef align 4 %B) {
define dso_local ventus_kernel void @regexti13(ptr addrspace(1) nocapture
; CHECK-LABEL: regexti13:
; CHECK: # %bb.0: # %entry
; CHECK: regexti zero, zero, 769
; CHECK-NEXT: addi sp, sp, 12
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: addi tp, tp, 4
; CHECK-NEXT: .cfi_def_cfa_offset 4
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v32, tp
; CHECK-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; CHECK-NEXT: regext zero, zero, 72
; CHECK-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, 0
; CHECK-NEXT: .cfi_offset v33.l, 0
; CHECK-NEXT: lw t0, 0(a0)
; CHECK-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; CHECK-NEXT: lw t0, 4(a0)
; CHECK-NEXT: sw t0, -8(sp) # 4-byte Folded Spill
; CHECK-NEXT: vmv.v.x v0, zero
; CHECK-NEXT: call _Z13get_global_idj
; CHECK-NEXT: regexti zero, zero, 769
; CHECK-NEXT: vand.vi v33, v0, 15
; CHECK-NEXT: vmv.v.x v0, zero
; CHECK-NEXT: call _Z12get_local_idj
; CHECK-NEXT: vsll.vi v0, v0, 2
; CHECK-NEXT: lw t1, -8(sp) # 4-byte Folded Reload
; CHECK-NEXT: vadd.vx v0, v0, t1
; CHECK-NEXT: vlw12.v v0, 0(v0)
; CHECK-NEXT: regext zero, zero, 64
; CHECK-NEXT: vsll.vi v1, v33, 2
; CHECK-NEXT: lw t0, -4(sp) # 4-byte Folded Reload
; CHECK-NEXT: vadd.vx v1, v1, t0
; CHECK-NEXT: vlw12.v v2, 0(v1)
; CHECK-NEXT: vadd.vv v0, v2, v0
; CHECK-NEXT: vsw12.v v0, 0(v1)
; CHECK-NEXT: lw ra, -12(sp) # 4-byte Folded Reload
; CHECK-NEXT: regext zero, zero, 9
; CHECK-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, -12
; CHECK-NEXT: addi tp, tp, -4
; CHECK-NEXT: regext zero, zero, 1
; CHECK-NEXT: vmv.v.x v32, tp
; CHECK-NEXT: ret
noundef align 4 %A, ptr addrspace(3) nocapture noundef align 4 %B) {
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%calland = and i32 %call, 399
@ -197,4 +235,4 @@ entry:
}
declare dso_local i32 @_Z13get_global_idj(i32 noundef)
declare dso_local i32 @_Z12get_local_idj(i32 noundef)
declare dso_local i32 @_Z12get_local_idj(i32 noundef)

View File

@ -1,24 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O0 \
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs \
; RUN: -asm-verbose < %s | FileCheck -check-prefix=VENTUS %s
; VENTUS: .section .rodata.ventus.resource,"w",@progbits
; VENTUS: .half 2
; VENTUS: .half 5
; VENTUS: .section .ventus.resource.usage,"w",@progbits
; VENTUS: .half 0
; VENTUS: .half 6
; VENTUS: .half 4
; VENTUS: .half 0
; VENTUS: .half 0
define dso_local ventus_kernel void @usage(ptr addrspace(1) nocapture noundef align 4 %b, ptr addrspace(3) nocapture noundef readonly align 4 %a) local_unnamed_addr #0 {
; VENTUS-LABEL: usage:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: lw t0, 4(a0)
; VENTUS-NEXT: # kill: def $v0 killed $x5
; VENTUS-NEXT: # kill: def $v0 killed $x6
; VENTUS-NEXT: lw t2, 0(t0)
; VENTUS-NEXT: lw t0, 0(t1)
; VENTUS-NEXT: add t0, t0, t2
; VENTUS-NEXT: lw t1, 0(a0)
; VENTUS-NEXT: lw t0, 0(t0)
; VENTUS-NEXT: lw t2, 0(t1)
; VENTUS-NEXT: add t0, t2, t0
; VENTUS-NEXT: sw t0, 0(t1)
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4

View File

@ -54,7 +54,8 @@ entry:
define dso_local i32 @sle(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: sle:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsle.vv v0, v1, v0
; VENTUS-NEXT: vmslt.vv v0, v1, v0
; VENTUS-NEXT: vxor.vi v0, v0, 1
; VENTUS-NEXT: ret
entry:
%cmp.not = icmp sle i32 %a, %b
@ -78,7 +79,8 @@ entry:
define dso_local i32 @sleu(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: sleu:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsleu.vv v0, v1, v0
; VENTUS-NEXT: vmsltu.vv v0, v1, v0
; VENTUS-NEXT: vxor.vi v0, v0, 1
; VENTUS-NEXT: ret
entry:
%cmp.not = icmp ule i32 %a, %b
@ -102,7 +104,8 @@ entry:
define dso_local i32 @slgt_imm(i32 noundef %a) local_unnamed_addr {
; VENTUS-LABEL: slgt_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsgt.vi v0, v0, 12
; VENTUS-NEXT: vmsle.vi v0, v0, 11
; VENTUS-NEXT: vxor.vi v0, v0, 1
; VENTUS-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 11
@ -114,7 +117,8 @@ entry:
define dso_local i32 @slgtu_imm(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: slgtu_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsgtu.vi v0, v0, 12
; VENTUS-NEXT: vmsleu.vi v0, v0, 11
; VENTUS-NEXT: vxor.vi v0, v0, 1
; VENTUS-NEXT: ret
entry:
%cmp = icmp ugt i32 %a, 11
@ -126,7 +130,8 @@ entry:
define dso_local i32 @slgtu_imm1(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: slgtu_imm1:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsgt.vi v0, v0, 12
; VENTUS-NEXT: vmsle.vi v0, v0, 11
; VENTUS-NEXT: vxor.vi v0, v0, 1
; VENTUS-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 11

View File

@ -13,23 +13,24 @@ target triple = "riscv32"
define dso_local i32 @printf(ptr addrspace(2) noundef %fmt, ...) {
; VENTUS-LABEL: printf:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 40
; VENTUS-NEXT: .cfi_def_cfa_offset 40
; VENTUS-NEXT: addi tp, tp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: vmv.v.x v8, tp
; VENTUS-NEXT: vsw.v v7, -36(v8)
; VENTUS-NEXT: vsw.v v6, -32(v8)
; VENTUS-NEXT: vsw.v v5, -28(v8)
; VENTUS-NEXT: vsw.v v4, -24(v8)
; VENTUS-NEXT: vsw.v v7, -4(v8)
; VENTUS-NEXT: vsw.v v6, -8(v8)
; VENTUS-NEXT: vsw.v v5, -12(v8)
; VENTUS-NEXT: vsw.v v4, -16(v8)
; VENTUS-NEXT: vsw.v v3, -20(v8)
; VENTUS-NEXT: vsw.v v2, -16(v8)
; VENTUS-NEXT: vsw.v v1, -12(v8)
; VENTUS-NEXT: addi t0, tp, -12
; VENTUS-NEXT: vsw.v v2, -24(v8)
; VENTUS-NEXT: vsw.v v1, -28(v8)
; VENTUS-NEXT: addi t0, tp, -36
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vsw.v v0, -12(v8)
; VENTUS-NEXT: addi t0, tp, -8
; VENTUS-NEXT: vsw.v v0, -4(v8)
; VENTUS-NEXT: addi t0, tp, -32
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vsw.v v0, -12(v8)
; VENTUS-NEXT: addi tp, tp, -40
; VENTUS-NEXT: vsw.v v0, -4(v8)
; VENTUS-NEXT: addi tp, tp, -4
; VENTUS-NEXT: vmv.v.x v8, tp
; VENTUS-NEXT: ret
entry:
%retval = alloca i32, align 4, addrspace(5)

View File

@ -8,7 +8,7 @@ define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
@ -37,7 +37,7 @@ define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: ret
entry:
@ -63,7 +63,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
@ -97,7 +97,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: .LBB1_3: # %for.cond.cleanup
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
@ -134,8 +134,10 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: regext zero, zero, 72
; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
@ -169,7 +171,6 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: vblt v0, v33, .LBB2_5
; VENTUS-NEXT: # %bb.3: # %if.then2
; VENTUS-NEXT: li t0, 23
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: j .LBB2_6
; VENTUS-NEXT: .LBB2_4: # %if.end7
; VENTUS-NEXT: li t0, 4
@ -178,16 +179,20 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: j .LBB2_7
; VENTUS-NEXT: .LBB2_5:
; VENTUS-NEXT: li t0, 12
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: .LBB2_6: # %cleanup9
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: .LBB2_7: # %cleanup9
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: addi tp, tp, -4
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: ret
entry:
%call = call i32 @_Z13get_global_idj(i32 noundef 0)
@ -219,7 +224,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
@ -269,7 +274,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: .LBB3_5: # %for.cond.cleanup
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
@ -313,7 +318,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
@ -336,7 +341,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: vadd.vi v3, v4, 8
; VENTUS-NEXT: vadd.vi v4, v4, 4
; VENTUS-NEXT: li t2, 1
; VENTUS-NEXT: li s0, 2
; VENTUS-NEXT: li s1, 2
; VENTUS-NEXT: j .LBB4_5
; VENTUS-NEXT: .LBB4_2: # %sw.default
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
@ -361,11 +366,11 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: # %bb.6: # %for.body
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: vadd.vx v5, v4, zero
; VENTUS-NEXT: vmv.v.x v6, s0
; VENTUS-NEXT: vmv.v.x v6, s1
; VENTUS-NEXT: beq t0, t2, .LBB4_3
; VENTUS-NEXT: # %bb.7: # %for.body
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: bne t0, s0, .LBB4_2
; VENTUS-NEXT: bne t0, s1, .LBB4_2
; VENTUS-NEXT: # %bb.8: # %sw.bb4
; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1
; VENTUS-NEXT: li t1, 23
@ -375,7 +380,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun
; VENTUS-NEXT: .LBB4_9: # %for.cond.cleanup
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -8
; VENTUS-NEXT: ret
entry:
@ -427,7 +432,7 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: li t0, 2
; VENTUS-NEXT: vmv.v.x v1, t0
@ -462,7 +467,7 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: .LBB5_7: # %return
; VENTUS-NEXT: # Label of block must be emitted
; VENTUS-NEXT: join zero, zero, 0
; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -4
; VENTUS-NEXT: ret
entry:

View File

@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
// RUN: clang -target riscv32 -mcpu=ventus-gpgpu < %s \
// RUN: | FileCheck -check-prefix=VENTUS %s

View File

@ -20,18 +20,3 @@ vfcvt.f.xu.v v4, v2
vfcvt.f.x.v v4, v2
# CHECK-INST: vfcvt.f.x.v v4, v2
# CHECK-ENCODING: [0x57,0x92,0x21,0x4a]
vfcvt.rtz.xu.f.v v4, v2
# CHECK-INST: vfcvt.rtz.xu.f.v v4, v2
# CHECK-ENCODING: [0x57,0x12,0x23,0x4a]
vfcvt.rtz.x.f.v v4, v2
# CHECK-INST: vfcvt.rtz.x.f.v v4, v2
# CHECK-ENCODING: [0x57,0x92,0x23,0x4a]

View File

@ -4,11 +4,6 @@
# RUN: | llvm-objdump -d --mattr=+v - \
# RUN: | FileCheck %s --check-prefix=CHECK-INST
vmv.x.s gp, v6
# CHECK-INST: vmv.x.s gp, v6
# CHECK-ENCODING: [0xd7,0x21,0x60,0x42]
# SKIP VMV_S_X
# SKIP VMERGE_VVM
@ -20,10 +15,3 @@ vmv.x.s gp, v6
vmv.v.x v6, s0
# CHECK-INST: vmv.v.x v6, s0
# CHECK-ENCODING: [0x57,0x43,0x04,0x5e]
# SKIP VFMERGE_VFM
vfmv.v.f v6, s0
# CHECK-INST: vfmv.v.f v6, s0
# CHECK-ENCODING: [0x57,0x53,0x04,0x5e]