diff --git a/clang/test/CodeGenOpenCL/barrier.cl b/clang/test/CodeGenOpenCL/barrier.cl index b7584551902b..285e9c813df2 100644 --- a/clang/test/CodeGenOpenCL/barrier.cl +++ b/clang/test/CodeGenOpenCL/barrier.cl @@ -1,4 +1,4 @@ -// RUN: clang -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -triple riscv32-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s void test() { // CHECK: call void @llvm.riscv.ventus.barrier(i32 1) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll index 97f21e5f6ff1..5ba1edc4ebf4 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll @@ -11,13 +11,15 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali ; VENTUS-NEXT: .cfi_def_cfa_offset 4 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vmv.v.x v32, tp -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, 4 +; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: .cfi_offset v33.l, 0 ; VENTUS-NEXT: lw t0, 0(a0) -; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill -; VENTUS-NEXT: lw t0, 4(a0) ; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: lw t0, 4(a0) +; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: vmv.v.x v0, zero ; VENTUS-NEXT: call _Z13get_global_idj ; VENTUS-NEXT: regext zero, zero, 1 @@ -25,19 +27,23 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali ; VENTUS-NEXT: vmv.v.x v0, zero ; VENTUS-NEXT: call _Z12get_local_idj ; VENTUS-NEXT: vsll.vi v0, v0, 2 -; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw t1, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: vadd.vx v0, v0, t1 ; VENTUS-NEXT: vlw12.v v0, 0(v0) ; VENTUS-NEXT: regext zero, zero, 64 ; VENTUS-NEXT: vsll.vi v1, v33, 2 -; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload ; VENTUS-NEXT: vadd.vx v1, v1, t0 ; VENTUS-NEXT: vlw12.v v2, 0(v1) ; VENTUS-NEXT: vadd.vv v0, v2, v0 ; VENTUS-NEXT: vsw12.v v0, 0(v1) -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -12 ; VENTUS-NEXT: addi tp, tp, -4 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll index a090733ba98a..a768792f47f1 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll @@ -7,23 +7,25 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { ; VENTUS-LABEL: foo: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi sp, sp, 8 -; VENTUS-NEXT: .cfi_def_cfa_offset 8 +; VENTUS-NEXT: addi sp, sp, 4 +; VENTUS-NEXT: .cfi_def_cfa_offset 4 +; VENTUS-NEXT: addi s0, s0, 20 +; VENTUS-NEXT: .cfi_def_cfa_offset 4 ; VENTUS-NEXT: addi tp, tp, 24 ; VENTUS-NEXT: .cfi_def_cfa_offset 24 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vmv.v.x v32, tp -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, 4 +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -24(v32) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: .cfi_offset v33.l, 0 ; VENTUS-NEXT: lw t0, 0(a0) ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vmv.v.x v33, t0 -; VENTUS-NEXT: lui t1, %hi(foo.b) -; VENTUS-NEXT: addi t2, t1, %lo(foo.b) -; VENTUS-NEXT: addi t1, tp, -24 +; VENTUS-NEXT: addi t1, tp, -20 +; VENTUS-NEXT: addi t2, s0, -20 ; VENTUS-NEXT: vmv.v.x v0, t1 -; VENTUS-NEXT: sw t2, 16(sp) # 4-byte Folded Spill ; VENTUS-NEXT: vmv.v.x v1, t2 ; VENTUS-NEXT: vmv.v.x v2, t0 ; VENTUS-NEXT: call bar @@ -37,10 +39,10 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { ; VENTUS-NEXT: vbltu v1, v0, .LBB0_2 ; VENTUS-NEXT: # %bb.1: # %if.then ; VENTUS-NEXT: vsll.vi v0, v0, 2 -; VENTUS-NEXT: addi t0, tp, -24 +; VENTUS-NEXT: addi t0, tp, -20 ; VENTUS-NEXT: vadd.vx v1, v0, t0 ; VENTUS-NEXT: vlw.v v1, 0(v1) -; VENTUS-NEXT: lw t1, 16(sp) # 4-byte Folded Reload +; VENTUS-NEXT: addi t1, s0, -20 ; VENTUS-NEXT: vadd.vx v2, v0, t1 ; VENTUS-NEXT: vlw12.v v2, 0(v2) ; VENTUS-NEXT: regext zero, zero, 64 @@ -60,9 +62,14 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { ; VENTUS-NEXT: .LBB0_3: # %if.end ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload -; VENTUS-NEXT: addi sp, sp, -8 +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -24(v32) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -4 +; VENTUS-NEXT: addi s0, s0, -20 ; VENTUS-NEXT: addi tp, tp, -24 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %a = alloca [5 x i32], align 4, addrspace(5) @@ -314,6 +321,8 @@ define dso_local i32 @stack_space(ptr addrspace(3) nocapture noundef readnone %a ; VENTUS-NEXT: vadd.vx v0, v0, t0 ; VENTUS-NEXT: vlw.v v0, 0(v0) ; VENTUS-NEXT: addi tp, tp, -48 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %test = alloca [12 x i32], align 4, addrspace(5) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/branch.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/branch.ll index 26ad7ded1865..f77a51171acc 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/branch.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/branch.ll @@ -6,17 +6,19 @@ define i32 @foo(i32 noundef %cond, i32 noundef %a, i32 noundef %b, i32 noundef %c) { ; VENTUS-LABEL: foo: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: vmv.v.x v4, zero +; VENTUS-NEXT: vmv.v.x v5, zero +; VENTUS-NEXT: regexti zero, zero, 0 +; VENTUS-NEXT: vrsub.vi v4, v3, 0 ; VENTUS-NEXT: .Lpcrel_hi0: ; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB0_2) ; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi0) -; VENTUS-NEXT: vbne v0, v4, .LBB0_2 -; VENTUS-NEXT: # %bb.1: -; VENTUS-NEXT: vrsub.vi v3, v3, 0 +; VENTUS-NEXT: vbeq v0, v5, .LBB0_2 +; VENTUS-NEXT: # %bb.1: # %entry +; VENTUS-NEXT: vadd.vx v4, v3, zero ; VENTUS-NEXT: .LBB0_2: # %entry ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 -; VENTUS-NEXT: vmadd.vv v2, v1, v3 +; VENTUS-NEXT: vmadd.vv v2, v1, v4 ; VENTUS-NEXT: vadd.vx v0, v2, zero ; VENTUS-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll index bb3343f7c922..6bd3baba2e91 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll @@ -11,9 +11,13 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp ; VENTUS-NEXT: .cfi_def_cfa_offset 8 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vmv.v.x v32, tp -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, 8 -; VENTUS-NEXT: .cfi_offset v33.l, 4 +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 0 +; VENTUS-NEXT: .cfi_offset v33.l, 0 ; VENTUS-NEXT: .cfi_offset v34.l, 0 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vadd.vx v33, v1, zero @@ -30,9 +34,15 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp ; VENTUS-NEXT: vlw12.v v2, 0(v0) ; VENTUS-NEXT: vadd.vv v1, v2, v1 ; VENTUS-NEXT: vsw12.v v1, 0(v0) -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -4 ; VENTUS-NEXT: addi tp, tp, -8 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll index 65e5569a56ea..f746ed689f20 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll @@ -7,24 +7,24 @@ define ventus_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 12 ; VENTUS-NEXT: .cfi_def_cfa_offset 12 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: lw t0, 0(a0) -; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill -; VENTUS-NEXT: lw t0, 4(a0) ; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: lw t0, 4(a0) +; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: vmv.v.x v0, zero ; VENTUS-NEXT: call _Z13get_global_idj ; VENTUS-NEXT: vsll.vi v0, v0, 2 -; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw t1, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: vadd.vx v1, v0, t1 ; VENTUS-NEXT: vlw12.v v1, 0(v1) -; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload ; VENTUS-NEXT: vadd.vx v0, v0, t0 ; VENTUS-NEXT: vlw12.v v2, 0(v0) ; VENTUS-NEXT: vadd.vv v1, v2, v1 ; VENTUS-NEXT: vsw12.v v1, 0(v0) -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -12 ; VENTUS-NEXT: ret entry: @@ -47,9 +47,13 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp ; VENTUS-NEXT: .cfi_def_cfa_offset 8 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vmv.v.x v32, tp -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, 8 -; VENTUS-NEXT: .cfi_offset v33.l, 4 +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 0 +; VENTUS-NEXT: .cfi_offset v33.l, 0 ; VENTUS-NEXT: .cfi_offset v34.l, 0 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vadd.vx v33, v1, zero @@ -66,9 +70,15 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp ; VENTUS-NEXT: vlw12.v v2, 0(v0) ; VENTUS-NEXT: vadd.vv v1, v2, v1 ; VENTUS-NEXT: vsw12.v v1, 0(v0) -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -4 ; VENTUS-NEXT: addi tp, tp, -8 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll index 453f7c4f2c1a..406bc4ae6632 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/calling-convention.ll @@ -12,30 +12,30 @@ define dso_local ventus_kernel void @kernel_calling_convention(ptr addrspace(1) ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 16 ; VENTUS-NEXT: .cfi_def_cfa_offset 16 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: lw t0, 4(a0) -; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill ; VENTUS-NEXT: lw t0, 8(a0) ; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: lw t0, 0(a0) -; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill ; VENTUS-NEXT: vmv.v.x v0, zero ; VENTUS-NEXT: call _Z13get_global_idj -; VENTUS-NEXT: lw s0, -4(sp) # 4-byte Folded Reload -; VENTUS-NEXT: lw t0, 0(s0) +; VENTUS-NEXT: lw s1, -12(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw t0, 0(s1) ; VENTUS-NEXT: lw t2, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: lw t1, 0(t2) ; VENTUS-NEXT: vadd.vx v0, v0, t0 ; VENTUS-NEXT: vadd.vx v0, v0, t1 -; VENTUS-NEXT: vmv.v.x v1, s0 +; VENTUS-NEXT: vmv.v.x v1, s1 ; VENTUS-NEXT: vsw12.v v0, 0(v1) -; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload -; VENTUS-NEXT: lw t0, 0(s0) +; VENTUS-NEXT: lw s1, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw t0, 0(s1) ; VENTUS-NEXT: lw t2, 0(t2) ; VENTUS-NEXT: add t0, t2, t0 -; VENTUS-NEXT: sw t0, 0(s0) -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: sw t0, 0(s1) +; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -16 ; VENTUS-NEXT: ret entry: @@ -81,12 +81,10 @@ entry: define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readonly %a1, ptr nocapture noundef readonly %a2, ptr nocapture noundef readonly %a3, ptr nocapture noundef readonly %a4, ptr nocapture noundef readonly %a5, ptr nocapture noundef readonly %a6, ptr nocapture noundef readonly %a7, ptr nocapture noundef readonly %a8, ptr nocapture noundef readonly %a9, ptr nocapture noundef readonly %a10, ptr nocapture noundef readonly %a11, ptr nocapture noundef readonly %a12, ptr nocapture noundef readonly %a13, ptr nocapture noundef readonly %a14, ptr nocapture noundef readonly %a15, ptr nocapture noundef readonly %a16, ptr nocapture noundef readonly %a17, ptr nocapture noundef readonly %a18, ptr nocapture noundef readonly %a19, ptr nocapture noundef readonly %a20, ptr nocapture noundef readonly %a21, ptr nocapture noundef readonly %a22, ptr nocapture noundef readonly %a23, ptr nocapture noundef readonly %a24, ptr nocapture noundef readonly %a25, ptr nocapture noundef readonly %a26, ptr nocapture noundef readonly %a27, ptr nocapture noundef readonly %a28, ptr nocapture noundef readonly %a29, ptr nocapture noundef readonly %a30, ptr nocapture noundef readonly %a31, ptr nocapture noundef readonly %a32, ptr addrspace(3) nocapture noundef readonly %a33, ptr addrspace(5) nocapture noundef readonly %a34) local_unnamed_addr #2 { ; VENTUS-LABEL: non_kernel_calling_convention: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi tp, tp, 16 -; VENTUS-NEXT: .cfi_def_cfa_offset 16 -; VENTUS-NEXT: regext zero, zero, 1 -; VENTUS-NEXT: vmv.v.x v32, tp -; VENTUS-NEXT: .cfi_offset v33.l, 4 -; VENTUS-NEXT: .cfi_offset v34.l, 0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill ; VENTUS-NEXT: regext zero, zero, 9 ; VENTUS-NEXT: vlw.v v33, -12(v32) ; VENTUS-NEXT: regext zero, zero, 9 @@ -160,7 +158,13 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon ; VENTUS-NEXT: vadd.vv v0, v0, v1 ; VENTUS-NEXT: vadd.vv v0, v0, v2 ; VENTUS-NEXT: vadd.vv v0, v0, v3 -; VENTUS-NEXT: addi tp, tp, -16 +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload +; VENTUS-NEXT: addi tp, tp, -8 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %0 = load i32, ptr %a1, align 4 @@ -269,27 +273,29 @@ define dso_local i32 @test_add(ptr nocapture noundef readonly %a, ptr nocapture ; VENTUS-NEXT: .cfi_def_cfa_offset 8 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vmv.v.x v32, tp -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: vlw12.v v0, 0(v0) ; VENTUS-NEXT: vadd.vi v0, v0, 1 ; VENTUS-NEXT: regext zero, zero, 8 -; VENTUS-NEXT: vsw.v v0, -8(v32) +; VENTUS-NEXT: vsw.v v0, -4(v32) ; VENTUS-NEXT: vlw12.v v0, 0(v1) ; VENTUS-NEXT: vadd.vi v0, v0, 2 ; VENTUS-NEXT: regext zero, zero, 8 -; VENTUS-NEXT: vsw.v v0, -4(v32) -; VENTUS-NEXT: addi t0, tp, -8 -; VENTUS-NEXT: addi t1, tp, -4 +; VENTUS-NEXT: vsw.v v0, -8(v32) +; VENTUS-NEXT: addi t0, tp, -4 +; VENTUS-NEXT: addi t1, tp, -8 ; VENTUS-NEXT: vmv.v.x v0, t0 ; VENTUS-NEXT: vmv.v.x v1, t1 ; VENTUS-NEXT: call add ; VENTUS-NEXT: regext zero, zero, 8 -; VENTUS-NEXT: vlw.v v1, -8(v32) +; VENTUS-NEXT: vlw.v v1, -4(v32) ; VENTUS-NEXT: vadd.vv v0, v1, v0 -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -4 ; VENTUS-NEXT: addi tp, tp, -8 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %d = alloca i32, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/eliminate_call_frame.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/eliminate_call_frame.ll index bed15f5b84c9..782c8ea3bffd 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/eliminate_call_frame.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/eliminate_call_frame.ll @@ -1,131 +1,717 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=VENTUS %s ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) vscale_range(1,2048) define dso_local <16 x double> @func(<16 x double> noundef %x, <16 x double> noundef %y) local_unnamed_addr { -; VENTUS: vsw.v v33, -4(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v35, -12(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v36, -16(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v37, -20(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v38, -24(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v39, -28(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v40, -32(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v41, -36(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v42, -40(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v43, -44(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v44, -48(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v45, -52(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v46, -56(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v47, -60(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v48, -64(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v49, -68(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v50, -72(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v51, -76(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v52, -80(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v53, -84(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v54, -88(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v55, -92(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v56, -96(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v57, -100(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v58, -104(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v59, -108(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v60, -112(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v61, -116(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v62, -120(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 72 -; VENTUS-NEXT: vsw.v v63, -124(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v64, -128(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v65, -132(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v66, -136(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v67, -140(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v68, -144(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v69, -148(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v70, -152(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v71, -156(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v72, -160(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v73, -164(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v74, -168(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v75, -172(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v76, -176(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v77, -180(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v78, -184(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v79, -188(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v80, -192(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v81, -196(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v82, -200(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v83, -204(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v84, -208(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v85, -212(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v86, -216(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v87, -220(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v88, -224(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v89, -228(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v90, -232(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v91, -236(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v92, -240(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v93, -244(v32) # 4-byte Folded Spill -; VENTUS-NEXT: regext zero, zero, 136 -; VENTUS-NEXT: vsw.v v94, -248(v32) # 4-byte Folded Spill +; VENTUS-LABEL: func: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: addi sp, sp, 4 +; VENTUS-NEXT: .cfi_def_cfa_offset 4 +; VENTUS-NEXT: addi tp, tp, 248 +; VENTUS-NEXT: .cfi_def_cfa_offset 248 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v35, -12(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v36, -16(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v37, -20(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v38, -24(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v39, -28(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v40, -32(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v41, -36(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v42, -40(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v43, -44(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v44, -48(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v45, -52(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v46, -56(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v47, -60(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v48, -64(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v49, -68(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v50, -72(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v51, -76(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v52, -80(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v53, -84(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v54, -88(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v55, -92(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v56, -96(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v57, -100(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v58, -104(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v59, -108(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v60, -112(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v61, -116(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v62, -120(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v63, -124(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v64, -128(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v65, -132(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v66, -136(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v67, -140(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v68, -144(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v69, -148(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v70, -152(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v71, -156(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v72, -160(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v73, -164(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v74, -168(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v75, -172(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v76, -176(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v77, -180(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v78, -184(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v79, -188(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v80, -192(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v81, -196(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v82, -200(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v83, -204(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v84, -208(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v85, -212(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v86, -216(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v87, -220(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v88, -224(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v89, -228(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v90, -232(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v91, -236(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v92, -240(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v93, -244(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v94, -248(v32) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 0 +; VENTUS-NEXT: .cfi_offset v33.l, 0 +; VENTUS-NEXT: .cfi_offset v34.l, 0 +; VENTUS-NEXT: .cfi_offset v35.l, 0 +; VENTUS-NEXT: .cfi_offset v36.l, 0 +; VENTUS-NEXT: .cfi_offset v37.l, 0 +; VENTUS-NEXT: .cfi_offset v38.l, 0 +; VENTUS-NEXT: .cfi_offset v39.l, 0 +; VENTUS-NEXT: .cfi_offset v40.l, 0 +; VENTUS-NEXT: .cfi_offset v41.l, 0 +; VENTUS-NEXT: .cfi_offset v42.l, 0 +; VENTUS-NEXT: .cfi_offset v43.l, 0 +; VENTUS-NEXT: .cfi_offset v44.l, 0 +; VENTUS-NEXT: .cfi_offset v45.l, 0 +; VENTUS-NEXT: .cfi_offset v46.l, 0 +; VENTUS-NEXT: .cfi_offset v47.l, 0 +; VENTUS-NEXT: .cfi_offset v48.l, 0 +; VENTUS-NEXT: .cfi_offset v49.l, 0 +; VENTUS-NEXT: .cfi_offset v50.l, 0 +; VENTUS-NEXT: .cfi_offset v51.l, 0 +; VENTUS-NEXT: .cfi_offset v52.l, 0 +; VENTUS-NEXT: .cfi_offset v53.l, 0 +; VENTUS-NEXT: .cfi_offset v54.l, 0 +; VENTUS-NEXT: .cfi_offset v55.l, 0 +; VENTUS-NEXT: .cfi_offset v56.l, 0 +; VENTUS-NEXT: .cfi_offset v57.l, 0 +; VENTUS-NEXT: .cfi_offset v58.l, 0 +; VENTUS-NEXT: .cfi_offset v59.l, 0 +; VENTUS-NEXT: .cfi_offset v60.l, 0 +; VENTUS-NEXT: .cfi_offset v61.l, 0 +; VENTUS-NEXT: .cfi_offset v62.l, 0 +; VENTUS-NEXT: .cfi_offset v63.l, 0 +; VENTUS-NEXT: .cfi_offset v64.l, 0 +; VENTUS-NEXT: .cfi_offset v65.l, 0 +; VENTUS-NEXT: .cfi_offset v66.l, 0 +; VENTUS-NEXT: .cfi_offset v67.l, 0 +; VENTUS-NEXT: .cfi_offset v68.l, 0 +; VENTUS-NEXT: .cfi_offset v69.l, 0 +; VENTUS-NEXT: .cfi_offset v70.l, 0 +; VENTUS-NEXT: .cfi_offset v71.l, 0 +; VENTUS-NEXT: .cfi_offset v72.l, 0 +; VENTUS-NEXT: .cfi_offset v73.l, 0 +; VENTUS-NEXT: .cfi_offset v74.l, 0 +; VENTUS-NEXT: .cfi_offset v75.l, 0 +; VENTUS-NEXT: .cfi_offset v76.l, 0 +; VENTUS-NEXT: .cfi_offset v77.l, 0 +; VENTUS-NEXT: .cfi_offset v78.l, 0 +; VENTUS-NEXT: .cfi_offset v79.l, 0 +; VENTUS-NEXT: .cfi_offset v80.l, 0 +; VENTUS-NEXT: .cfi_offset v81.l, 0 +; VENTUS-NEXT: .cfi_offset v82.l, 0 +; VENTUS-NEXT: .cfi_offset v83.l, 0 +; VENTUS-NEXT: .cfi_offset v84.l, 0 +; VENTUS-NEXT: .cfi_offset v85.l, 0 +; VENTUS-NEXT: .cfi_offset v86.l, 0 +; VENTUS-NEXT: .cfi_offset v87.l, 0 +; VENTUS-NEXT: .cfi_offset v88.l, 0 +; VENTUS-NEXT: .cfi_offset v89.l, 0 +; VENTUS-NEXT: .cfi_offset v90.l, 0 +; VENTUS-NEXT: .cfi_offset v91.l, 0 +; VENTUS-NEXT: .cfi_offset v92.l, 0 +; VENTUS-NEXT: .cfi_offset v93.l, 0 +; VENTUS-NEXT: .cfi_offset v94.l, 0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v33, v31, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v34, v30, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v35, v29, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v36, v28, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v37, v27, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v38, v26, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v39, v25, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v40, v24, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v41, v23, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v42, v22, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v43, v21, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v44, v20, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v45, v19, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v46, v18, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v47, v17, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v48, v16, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v49, v15, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v50, v14, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v51, v13, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v52, v12, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v53, v11, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v54, v10, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v55, v9, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v56, v8, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v57, v7, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v58, v6, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v59, v5, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v60, v4, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v61, v3, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v62, v2, zero +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v63, -256(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v64, -252(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v65, -264(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v66, -260(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v67, -272(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v68, -268(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v69, -280(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v70, -276(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v71, -288(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v72, -284(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v73, -296(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v74, -292(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v75, -304(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v76, -300(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v77, -312(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v78, -308(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v79, -320(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v80, -316(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v81, -328(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v82, -324(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v83, -336(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v84, -332(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v85, -344(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v86, -340(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v87, -352(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v88, -348(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v89, -360(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v90, -356(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v91, -368(v32) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v92, -364(v32) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vlw.v v2, -376(v32) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vlw.v v3, -372(v32) +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 2 +; VENTUS-NEXT: vadd.vx v93, v0, zero +; VENTUS-NEXT: regext zero, zero, 2 +; VENTUS-NEXT: vadd.vx v94, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v62, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v61, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v91, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v92, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v61, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v62, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v60, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v59, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v89, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v90, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v59, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v60, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v58, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v57, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v87, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v88, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v57, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v58, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v56, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v55, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v85, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v86, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v55, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v56, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v54, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v53, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v83, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v84, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v53, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v54, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v52, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v51, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v81, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v82, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v51, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v52, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v50, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v49, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v79, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v80, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v49, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v50, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v48, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v47, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v77, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v78, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v47, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v48, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v46, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v45, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v75, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v76, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v45, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v46, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v44, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v43, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v73, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v74, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v43, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v44, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v42, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v41, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v71, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v72, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v41, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v42, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v40, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v39, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v69, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v70, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v39, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v40, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v38, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v37, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v67, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v68, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v37, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v38, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v36, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v35, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v2, v65, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v66, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v35, v0, zero +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vadd.vx v36, v1, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v34, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v1, v33, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v2, v63, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v3, v64, zero +; VENTUS-NEXT: call __adddf3@plt +; VENTUS-NEXT: vadd.vx v30, v0, zero +; VENTUS-NEXT: vadd.vx v31, v1, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v0, v93, zero +; VENTUS-NEXT: regext zero, zero, 128 +; VENTUS-NEXT: vadd.vx v1, v94, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v2, v61, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v3, v62, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v4, v59, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v5, v60, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v6, v57, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v7, v58, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v8, v55, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v9, v56, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v10, v53, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v11, v54, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v12, v51, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v13, v52, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v14, v49, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v15, v50, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v16, v47, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v17, v48, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v18, v45, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v19, v46, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v20, v43, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v21, v44, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v22, v41, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v23, v42, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v24, v39, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v25, v40, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v26, v37, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v27, v38, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v28, v35, zero +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v29, v36, zero +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v35, -12(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v36, -16(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v37, -20(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v38, -24(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v39, -28(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v40, -32(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v41, -36(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v42, -40(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v43, -44(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v44, -48(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v45, -52(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v46, -56(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v47, -60(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v48, -64(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v49, -68(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v50, -72(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v51, -76(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v52, -80(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v53, -84(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v54, -88(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v55, -92(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v56, -96(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v57, -100(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v58, -104(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v59, -108(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v60, -112(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v61, -116(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v62, -120(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v63, -124(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v64, -128(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v65, -132(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v66, -136(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v67, -140(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v68, -144(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v69, -148(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v70, -152(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v71, -156(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v72, -160(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v73, -164(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v74, -168(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v75, -172(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v76, -176(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v77, -180(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v78, -184(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v79, -188(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v80, -192(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v81, -196(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v82, -200(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v83, -204(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v84, -208(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v85, -212(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v86, -216(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v87, -220(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v88, -224(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v89, -228(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v90, -232(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v91, -236(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v92, -240(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v93, -244(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v94, -248(v32) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -4 +; VENTUS-NEXT: addi tp, tp, -248 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp +; VENTUS-NEXT: ret entry: %add = fadd <16 x double> %x, %y ret <16 x double> %add @@ -133,7 +719,568 @@ entry: ; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) vscale_range(1,2048) define dso_local ventus_kernel void @test_fn(ptr addrspace(1) nocapture noundef readonly align 128 %x, ptr addrspace(1) nocapture noundef readonly align 128 %y, ptr addrspace(1) nocapture noundef writeonly align 128 %dst) { -; VENTUS: addi tp, tp, 128 +; VENTUS-LABEL: test_fn: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: addi sp, sp, 16 +; VENTUS-NEXT: .cfi_def_cfa_offset 16 +; VENTUS-NEXT: addi tp, tp, 136 +; VENTUS-NEXT: .cfi_def_cfa_offset 136 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp +; VENTUS-NEXT: sw ra, -16(sp) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v34, -8(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v35, -12(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v36, -16(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v37, -20(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v38, -24(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v39, -28(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v40, -32(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v41, -36(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v42, -40(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v43, -44(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v44, -48(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v45, -52(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v46, -56(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v47, -60(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v48, -64(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v49, -68(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v50, -72(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v51, -76(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v52, -80(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v53, -84(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v54, -88(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v55, -92(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v56, -96(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v57, -100(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v58, -104(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v59, -108(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v60, -112(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v61, -116(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v62, -120(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v63, -124(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v64, -128(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v65, -132(v32) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v66, -136(v32) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 0 +; VENTUS-NEXT: .cfi_offset v33.l, 0 +; VENTUS-NEXT: .cfi_offset v34.l, 0 +; VENTUS-NEXT: .cfi_offset v35.l, 0 +; VENTUS-NEXT: .cfi_offset v36.l, 0 +; VENTUS-NEXT: .cfi_offset v37.l, 0 +; VENTUS-NEXT: .cfi_offset v38.l, 0 +; VENTUS-NEXT: .cfi_offset v39.l, 0 +; VENTUS-NEXT: .cfi_offset v40.l, 0 +; VENTUS-NEXT: .cfi_offset v41.l, 0 +; VENTUS-NEXT: .cfi_offset v42.l, 0 +; VENTUS-NEXT: .cfi_offset v43.l, 0 +; VENTUS-NEXT: .cfi_offset v44.l, 0 +; VENTUS-NEXT: .cfi_offset v45.l, 0 +; VENTUS-NEXT: .cfi_offset v46.l, 0 +; VENTUS-NEXT: .cfi_offset v47.l, 0 +; VENTUS-NEXT: .cfi_offset v48.l, 0 +; VENTUS-NEXT: .cfi_offset v49.l, 0 +; VENTUS-NEXT: .cfi_offset v50.l, 0 +; VENTUS-NEXT: .cfi_offset v51.l, 0 +; VENTUS-NEXT: .cfi_offset v52.l, 0 +; VENTUS-NEXT: .cfi_offset v53.l, 0 +; VENTUS-NEXT: .cfi_offset v54.l, 0 +; VENTUS-NEXT: .cfi_offset v55.l, 0 +; VENTUS-NEXT: .cfi_offset v56.l, 0 +; VENTUS-NEXT: .cfi_offset v57.l, 0 +; VENTUS-NEXT: .cfi_offset v58.l, 0 +; VENTUS-NEXT: .cfi_offset v59.l, 0 +; VENTUS-NEXT: .cfi_offset v60.l, 0 +; VENTUS-NEXT: .cfi_offset v61.l, 0 +; VENTUS-NEXT: .cfi_offset v62.l, 0 +; VENTUS-NEXT: .cfi_offset v63.l, 0 +; VENTUS-NEXT: .cfi_offset v64.l, 0 +; VENTUS-NEXT: .cfi_offset v65.l, 0 +; VENTUS-NEXT: .cfi_offset v66.l, 0 +; VENTUS-NEXT: lw t0, 8(a0) +; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: lw t0, 4(a0) +; VENTUS-NEXT: sw t0, -8(sp) # 4-byte Folded Spill +; VENTUS-NEXT: lw t0, 0(a0) +; VENTUS-NEXT: sw t0, -12(sp) # 4-byte Folded Spill +; VENTUS-NEXT: vmv.v.x v0, zero +; VENTUS-NEXT: call _Z13get_global_idj +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vsll.vi v33, v0, 7 +; VENTUS-NEXT: lw t1, -12(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 64 +; VENTUS-NEXT: vadd.vx v0, v33, t1 +; VENTUS-NEXT: vlw12.v v31, 124(v0) +; VENTUS-NEXT: vlw12.v v30, 120(v0) +; VENTUS-NEXT: vlw12.v v29, 116(v0) +; VENTUS-NEXT: vlw12.v v28, 112(v0) +; VENTUS-NEXT: vlw12.v v27, 108(v0) +; VENTUS-NEXT: vlw12.v v26, 104(v0) +; VENTUS-NEXT: vlw12.v v25, 100(v0) +; VENTUS-NEXT: vlw12.v v24, 96(v0) +; VENTUS-NEXT: vlw12.v v23, 92(v0) +; VENTUS-NEXT: vlw12.v v22, 88(v0) +; VENTUS-NEXT: vlw12.v v21, 84(v0) +; VENTUS-NEXT: vlw12.v v20, 80(v0) +; VENTUS-NEXT: vlw12.v v19, 76(v0) +; VENTUS-NEXT: vlw12.v v18, 72(v0) +; VENTUS-NEXT: vlw12.v v17, 68(v0) +; VENTUS-NEXT: vlw12.v v16, 64(v0) +; VENTUS-NEXT: vlw12.v v15, 60(v0) +; VENTUS-NEXT: vlw12.v v14, 56(v0) +; VENTUS-NEXT: vlw12.v v13, 52(v0) +; VENTUS-NEXT: vlw12.v v12, 48(v0) +; VENTUS-NEXT: vlw12.v v11, 44(v0) +; VENTUS-NEXT: vlw12.v v10, 40(v0) +; VENTUS-NEXT: vlw12.v v9, 36(v0) +; VENTUS-NEXT: vlw12.v v8, 32(v0) +; VENTUS-NEXT: vlw12.v v7, 28(v0) +; VENTUS-NEXT: vlw12.v v6, 24(v0) +; VENTUS-NEXT: vlw12.v v5, 20(v0) +; VENTUS-NEXT: vlw12.v v4, 16(v0) +; VENTUS-NEXT: vlw12.v v3, 12(v0) +; VENTUS-NEXT: vlw12.v v2, 8(v0) +; VENTUS-NEXT: vlw12.v v1, 4(v0) +; VENTUS-NEXT: vlw12.v v0, 0(v0) +; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 65 +; VENTUS-NEXT: vadd.vx v34, v33, t0 +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v35, 0(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v36, 4(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v37, 8(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v38, 12(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v39, 16(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v40, 20(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v41, 24(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v42, 28(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v43, 32(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v44, 36(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v45, 40(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v46, 44(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v47, 48(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v48, 52(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v49, 56(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v50, 60(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v51, 64(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v52, 68(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v53, 72(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v54, 76(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v55, 80(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v56, 84(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v57, 88(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v58, 92(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v59, 96(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v60, 100(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v61, 104(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v62, 108(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v63, 112(v34) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw12.v v64, 116(v34) +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw12.v v65, 120(v34) +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw12.v v34, 124(v34) +; VENTUS-NEXT: addi tp, tp, 128 +; VENTUS-NEXT: li t0, 4 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 2 +; VENTUS-NEXT: vmv.v.x v66, t0 +; VENTUS-NEXT: regext zero, zero, 80 +; VENTUS-NEXT: vsw.v v34, 0(v66) +; VENTUS-NEXT: li t0, 8 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v65, 0(v34) +; VENTUS-NEXT: li t0, 12 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 136 +; VENTUS-NEXT: vsw.v v64, 0(v34) +; VENTUS-NEXT: li t0, 16 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v63, 0(v34) +; VENTUS-NEXT: li t0, 20 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v62, 0(v34) +; VENTUS-NEXT: li t0, 24 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v61, 0(v34) +; VENTUS-NEXT: li t0, 28 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v60, 0(v34) +; VENTUS-NEXT: li t0, 32 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v59, 0(v34) +; VENTUS-NEXT: li t0, 36 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v58, 0(v34) +; VENTUS-NEXT: li t0, 40 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v57, 0(v34) +; VENTUS-NEXT: li t0, 44 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v56, 0(v34) +; VENTUS-NEXT: li t0, 48 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v55, 0(v34) +; VENTUS-NEXT: li t0, 52 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v54, 0(v34) +; VENTUS-NEXT: li t0, 56 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v53, 0(v34) +; VENTUS-NEXT: li t0, 60 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v52, 0(v34) +; VENTUS-NEXT: li t0, 64 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v51, 0(v34) +; VENTUS-NEXT: li t0, 68 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v50, 0(v34) +; VENTUS-NEXT: li t0, 72 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v49, 0(v34) +; VENTUS-NEXT: li t0, 76 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v48, 0(v34) +; VENTUS-NEXT: li t0, 80 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v47, 0(v34) +; VENTUS-NEXT: li t0, 84 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v46, 0(v34) +; VENTUS-NEXT: li t0, 88 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v45, 0(v34) +; VENTUS-NEXT: li t0, 92 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v44, 0(v34) +; VENTUS-NEXT: li t0, 96 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v43, 0(v34) +; VENTUS-NEXT: li t0, 100 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v42, 0(v34) +; VENTUS-NEXT: li t0, 104 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v41, 0(v34) +; VENTUS-NEXT: li t0, 108 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v40, 0(v34) +; VENTUS-NEXT: li t0, 112 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v39, 0(v34) +; VENTUS-NEXT: li t0, 116 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v38, 0(v34) +; VENTUS-NEXT: li t0, 120 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v37, 0(v34) +; VENTUS-NEXT: li t0, 124 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v36, 0(v34) +; VENTUS-NEXT: li t0, 128 +; VENTUS-NEXT: sub t0, tp, t0 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v34, t0 +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v35, 0(v34) +; VENTUS-NEXT: call _Z3minDv16_dS_ +; VENTUS-NEXT: addi tp, tp, -128 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp +; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 65 +; VENTUS-NEXT: vadd.vx v33, v33, t0 +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v31, 124(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v30, 120(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v29, 116(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v28, 112(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v27, 108(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v26, 104(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v25, 100(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v24, 96(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v23, 92(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v22, 88(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v21, 84(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v20, 80(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v19, 76(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v18, 72(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v17, 68(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v16, 64(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v15, 60(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v14, 56(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v13, 52(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v12, 48(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v11, 44(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v10, 40(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v9, 36(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v8, 32(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v7, 28(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v6, 24(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v5, 20(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v4, 16(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v3, 12(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v2, 8(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v1, 4(v33) +; VENTUS-NEXT: regext zero, zero, 8 +; VENTUS-NEXT: vsw12.v v0, 0(v33) +; VENTUS-NEXT: lw ra, -16(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v34, -8(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v35, -12(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v36, -16(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v37, -20(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v38, -24(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v39, -28(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v40, -32(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v41, -36(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v42, -40(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v43, -44(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v44, -48(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v45, -52(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v46, -56(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v47, -60(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v48, -64(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v49, -68(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v50, -72(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v51, -76(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v52, -80(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v53, -84(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v54, -88(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v55, -92(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v56, -96(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v57, -100(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v58, -104(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v59, -108(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v60, -112(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v61, -116(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v62, -120(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v63, -124(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v64, -128(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v65, -132(v32) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 10 +; VENTUS-NEXT: vlw.v v66, -136(v32) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -16 +; VENTUS-NEXT: addi tp, tp, -136 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp +; VENTUS-NEXT: ret ; VENTUS=NEXT: li t0, 4 ; VENTUS=NEXT: sub t0, tp, t0 ; VENTUS=NEXT: regext zero, zero, 2 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll index 40a944d2e4aa..8e0dca08af1c 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll @@ -25,7 +25,7 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 8 ; VENTUS-NEXT: .cfi_def_cfa_offset 8 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: lw t0, 8(a0) ; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill @@ -37,7 +37,7 @@ define dso_local ventus_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr add ; VENTUS-NEXT: lw t0, -4(sp) # 4-byte Folded Reload ; VENTUS-NEXT: vmv.v.x v1, t0 ; VENTUS-NEXT: vsw12.v v0, 0(v1) -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -8 ; VENTUS-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll index 27a534545644..907e42f2cebe 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/int_arithmetic.ll @@ -285,6 +285,7 @@ define i32 @vmulhsu_x(i32 %a) nounwind { define i32 @vrsub_i(i32 %a) nounwind { ; VENTUS-LABEL: vrsub_i: ; VENTUS: # %bb.0: +; VENTUS-NEXT: regexti zero, zero, 0 ; VENTUS-NEXT: vrsub.vi v0, v0, 12 ; VENTUS-NEXT: ret %1 = sub i32 12, %a diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/parameter-vector-struct-types.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/parameter-vector-struct-types.ll index 175e968314fd..aad14a9b58e2 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/parameter-vector-struct-types.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/parameter-vector-struct-types.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \ ; RUN: | FileCheck %s @@ -6,11 +7,20 @@ ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048) ; Here we foucus on kernel struct argument define dso_local ventus_kernel void @test_kernel1(i8 noundef %c, %struct.MyStruct %st.coerce, i8 noundef %uc, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { +; CHECK-LABEL: test_kernel1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lb t0, 0(a0) +; CHECK-NEXT: lbu t1, 24(a0) +; CHECK-NEXT: lw t2, 28(a0) +; CHECK-NEXT: fcvt.s.w t0, t0 +; CHECK-NEXT: lw s1, 8(a0) +; CHECK-NEXT: sw t0, 0(t2) +; CHECK-NEXT: fcvt.s.w t0, s1 +; CHECK-NEXT: fcvt.s.wu t1, t1 +; CHECK-NEXT: sw t0, 4(t2) +; CHECK-NEXT: sw t1, 8(t2) +; CHECK-NEXT: ret entry: - ; CHECK: lb t0, 0(a0) - ; CHECK: lbu t1, 24(a0) - ; CHECK: lw t2, 28(a0) - ; CHECK: lw s0, 8(a0) %st.coerce.fca.0.extract = extractvalue %struct.MyStruct %st.coerce, 0 %conv = sitofp i8 %c to float store float %conv, ptr addrspace(1) %result, align 4 @@ -26,15 +36,31 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) vscale_range(1,2048) ; Here we foucus on scalar argument define dso_local ventus_kernel void @test_kernel2(i8 noundef %c, i8 noundef %uc, i16 noundef %s, i16 noundef %us, i32 noundef %i, i32 noundef %ui, float noundef %f, ptr addrspace(1) nocapture noundef writeonly align 4 %result) { +; CHECK-LABEL: test_kernel2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lw t0, 24(a0) +; CHECK-NEXT: lw t1, 20(a0) +; CHECK-NEXT: lw t2, 16(a0) +; CHECK-NEXT: lhu s1, 12(a0) +; CHECK-NEXT: lh a1, 8(a0) +; CHECK-NEXT: lb a2, 0(a0) +; CHECK-NEXT: lbu a3, 4(a0) +; CHECK-NEXT: lw a0, 28(a0) +; CHECK-NEXT: fcvt.s.w a2, a2 +; CHECK-NEXT: fcvt.s.wu a3, a3 +; CHECK-NEXT: sw a2, 0(a0) +; CHECK-NEXT: sw a3, 4(a0) +; CHECK-NEXT: fcvt.s.w a1, a1 +; CHECK-NEXT: fcvt.s.wu s1, s1 +; CHECK-NEXT: sw a1, 8(a0) +; CHECK-NEXT: sw s1, 12(a0) +; CHECK-NEXT: fcvt.s.w t2, t2 +; CHECK-NEXT: fcvt.s.wu t1, t1 +; CHECK-NEXT: sw t2, 16(a0) +; CHECK-NEXT: sw t1, 20(a0) +; CHECK-NEXT: sw t0, 24(a0) +; CHECK-NEXT: ret entry: - ; CHECK: flw t0, 24(a0) - ; CHECK: lw t1, 20(a0) - ; CHECK: lw t2, 16(a0) - ; CHECK: lhu s0, 12(a0) - ; CHECK: lh s1, 8(a0) - ; CHECK: lb a1, 0(a0) - ; CHECK: lbu a2, 4(a0) - ; CHECK: lw a0, 28(a0) %conv = sitofp i8 %c to float store float %conv, ptr addrspace(1) %result, align 4 %conv1 = uitofp i8 %uc to float @@ -60,109 +86,381 @@ entry: ; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: write) vscale_range(1,2048) ; Here we foucus on vector argument define dso_local ventus_kernel void @test_kernel3(<2 x i8> noundef %c, <2 x i8> noundef %uc, <2 x i16> noundef %s, <2 x i16> noundef %us, <2 x i32> noundef %i, <2 x i32> noundef %ui, <2 x float> noundef %f, ptr addrspace(1) nocapture noundef writeonly align 8 %result) { +; CHECK-LABEL: test_kernel3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, 56 +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: addi tp, tp, 4 +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v32, tp +; CHECK-NEXT: sw ra, -56(sp) # 4-byte Folded Spill +; CHECK-NEXT: regext zero, zero, 72 +; CHECK-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, 0 +; CHECK-NEXT: .cfi_offset v33.l, 0 +; CHECK-NEXT: lw t0, 36(a0) +; CHECK-NEXT: sw t0, -4(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 32(a0) +; CHECK-NEXT: sw t0, -8(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 28(a0) +; CHECK-NEXT: sw t0, -12(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 24(a0) +; CHECK-NEXT: sw t0, -16(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 20(a0) +; CHECK-NEXT: sw t0, -20(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 16(a0) +; CHECK-NEXT: sw t0, -24(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 14(a0) +; CHECK-NEXT: sw t0, -28(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 12(a0) +; CHECK-NEXT: sw t0, -32(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 10(a0) +; CHECK-NEXT: sw t0, -36(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 8(a0) +; CHECK-NEXT: sw t0, -40(sp) # 4-byte Folded Spill +; CHECK-NEXT: lbu t0, 5(a0) +; CHECK-NEXT: sw t0, -44(sp) # 4-byte Folded Spill +; CHECK-NEXT: lbu t0, 4(a0) +; CHECK-NEXT: sw t0, -48(sp) # 4-byte Folded Spill +; CHECK-NEXT: lbu t0, 1(a0) +; CHECK-NEXT: lbu t1, 0(a0) +; CHECK-NEXT: lw t2, 40(a0) +; CHECK-NEXT: sw t2, -52(sp) # 4-byte Folded Spill +; CHECK-NEXT: vmv.v.x v0, t1 +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: call _Z14convert_float2Dv2_c +; CHECK-NEXT: lw t0, -52(sp) # 4-byte Folded Reload +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v33, t0 +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 4(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 0(v33) +; CHECK-NEXT: lw t0, -48(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -44(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: call _Z14convert_float2Dv2_h +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 12(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 8(v33) +; CHECK-NEXT: lw t0, -40(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -36(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: call _Z14convert_float2Dv2_s +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 20(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 16(v33) +; CHECK-NEXT: lw t0, -32(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -28(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: call _Z14convert_float2Dv2_t +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 28(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 24(v33) +; CHECK-NEXT: lw t0, -24(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -20(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: call _Z14convert_float2Dv2_i +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 36(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 32(v33) +; CHECK-NEXT: lw t0, -16(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -12(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: call _Z14convert_float2Dv2_j +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 44(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 40(v33) +; CHECK-NEXT: lw t0, -8(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -4(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: call _Z14convert_float2Dv2_f +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 52(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 48(v33) +; CHECK-NEXT: lw ra, -56(sp) # 4-byte Folded Reload +; CHECK-NEXT: regext zero, zero, 9 +; CHECK-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, -56 +; CHECK-NEXT: addi tp, tp, -4 +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v32, tp +; CHECK-NEXT: ret entry: - ;CHECK: flw t0, 36(a0) - ;CHECK: flw t0, 32(a0) - ;CHECK: lw t0, 28(a0) - ;CHECK: lw t0, 24(a0) - ;CHECK: lw t0, 20(a0) - ;CHECK: lw t0, 16(a0) - ;CHECK: lhu t0, 14(a0) - ;CHECK: lhu t0, 12(a0) - ;CHECK: lhu t0, 10(a0) - ;CHECK: lhu t0, 8(a0) - ;CHECK: lbu t0, 5(a0) - ;CHECK: lbu t0, 4(a0) - ;CHECK: lbu t0, 1(a0) - ;CHECK: lbu t1, 0(a0) - ;CHECK: lw t2, 40(a0) - %call = call <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef %c) + %call = call <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef %c) store <2 x float> %call, ptr addrspace(1) %result, align 8 - %call1 = call <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef %uc) + %call1 = call <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef %uc) %arrayidx2 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 1 store <2 x float> %call1, ptr addrspace(1) %arrayidx2, align 8 - %call3 = call <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef %s) + %call3 = call <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef %s) %arrayidx4 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 2 store <2 x float> %call3, ptr addrspace(1) %arrayidx4, align 8 - %call5 = call <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef %us) + %call5 = call <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef %us) %arrayidx6 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 3 store <2 x float> %call5, ptr addrspace(1) %arrayidx6, align 8 - %call7 = call <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef %i) + %call7 = call <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef %i) %arrayidx8 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 4 store <2 x float> %call7, ptr addrspace(1) %arrayidx8, align 8 - %call9 = call <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef %ui) + %call9 = call <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef %ui) %arrayidx10 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 5 store <2 x float> %call9, ptr addrspace(1) %arrayidx10, align 8 - %call11 = call <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef %f) + %call11 = call <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef %f) %arrayidx12 = getelementptr inbounds <2 x float>, ptr addrspace(1) %result, i32 6 store <2 x float> %call11, ptr addrspace(1) %arrayidx12, align 8 ret void } -declare dso_local <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef) -declare dso_local <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef) -declare dso_local <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef) -declare dso_local <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef) -declare dso_local <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef) -declare dso_local <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef) +declare dso_local <2 x float> @_Z14convert_float2Dv2_c(<2 x i8> noundef) +declare dso_local <2 x float> @_Z14convert_float2Dv2_h(<2 x i8> noundef) +declare dso_local <2 x float> @_Z14convert_float2Dv2_s(<2 x i16> noundef) +declare dso_local <2 x float> @_Z14convert_float2Dv2_t(<2 x i16> noundef) +declare dso_local <2 x float> @_Z14convert_float2Dv2_i(<2 x i32> noundef) +declare dso_local <2 x float> @_Z14convert_float2Dv2_j(<2 x i32> noundef) declare dso_local <2 x float> @_Z14convert_float2Dv2_f(<2 x float> noundef) ; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn memory(argmem: write) vscale_range(1,2048) ; Here we foucus on vector argument define dso_local ventus_kernel void @test_kernel4(<4 x i8> noundef %c, <4 x i8> noundef %uc, <4 x i16> noundef %s, <4 x i16> noundef %us, <4 x i32> noundef %i, <4 x i32> noundef %ui, <4 x float> noundef %f, ptr addrspace(1) nocapture noundef writeonly align 16 %result) { +; CHECK-LABEL: test_kernel4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, 104 +; CHECK-NEXT: .cfi_def_cfa_offset 104 +; CHECK-NEXT: addi tp, tp, 4 +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v32, tp +; CHECK-NEXT: sw ra, -104(sp) # 4-byte Folded Spill +; CHECK-NEXT: regext zero, zero, 72 +; CHECK-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, 0 +; CHECK-NEXT: .cfi_offset v33.l, 0 +; CHECK-NEXT: lw t0, 76(a0) +; CHECK-NEXT: sw t0, -4(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 72(a0) +; CHECK-NEXT: sw t0, -8(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 68(a0) +; CHECK-NEXT: sw t0, -12(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 64(a0) +; CHECK-NEXT: sw t0, -16(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 60(a0) +; CHECK-NEXT: sw t0, -20(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 56(a0) +; CHECK-NEXT: sw t0, -24(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 52(a0) +; CHECK-NEXT: sw t0, -28(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 48(a0) +; CHECK-NEXT: sw t0, -32(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 44(a0) +; CHECK-NEXT: sw t0, -36(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 40(a0) +; CHECK-NEXT: sw t0, -40(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 36(a0) +; CHECK-NEXT: sw t0, -44(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 32(a0) +; CHECK-NEXT: sw t0, -48(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 22(a0) +; CHECK-NEXT: sw t0, -52(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 20(a0) +; CHECK-NEXT: sw t0, -56(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 18(a0) +; CHECK-NEXT: sw t0, -60(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 16(a0) +; CHECK-NEXT: sw t0, -64(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 14(a0) +; CHECK-NEXT: sw t0, -68(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 12(a0) +; CHECK-NEXT: sw t0, -72(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 10(a0) +; CHECK-NEXT: sw t0, -76(sp) # 4-byte Folded Spill +; CHECK-NEXT: lhu t0, 8(a0) +; CHECK-NEXT: sw t0, -80(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 4(a0) +; CHECK-NEXT: lbu t1, 4(a0) +; CHECK-NEXT: sw t1, -88(sp) # 4-byte Folded Spill +; CHECK-NEXT: srli t1, t0, 24 +; CHECK-NEXT: sw t1, -84(sp) # 4-byte Folded Spill +; CHECK-NEXT: srli t1, t0, 8 +; CHECK-NEXT: andi t1, t1, 255 +; CHECK-NEXT: sw t1, -92(sp) # 4-byte Folded Spill +; CHECK-NEXT: srli t0, t0, 16 +; CHECK-NEXT: andi t0, t0, 255 +; CHECK-NEXT: sw t0, -96(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 80(a0) +; CHECK-NEXT: sw t0, -100(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 0(a0) +; CHECK-NEXT: lbu t1, 0(a0) +; CHECK-NEXT: srli t2, t0, 24 +; CHECK-NEXT: srli s1, t0, 8 +; CHECK-NEXT: andi s1, s1, 255 +; CHECK-NEXT: srli t0, t0, 16 +; CHECK-NEXT: andi t0, t0, 255 +; CHECK-NEXT: vmv.v.x v0, t1 +; CHECK-NEXT: vmv.v.x v1, s1 +; CHECK-NEXT: vmv.v.x v2, t0 +; CHECK-NEXT: vmv.v.x v3, t2 +; CHECK-NEXT: call _Z14convert_float4Dv4_c +; CHECK-NEXT: lw t0, -100(sp) # 4-byte Folded Reload +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v33, t0 +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v3, 12(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v2, 8(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 4(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 0(v33) +; CHECK-NEXT: lw t0, -88(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -92(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: lw t0, -96(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v2, t0 +; CHECK-NEXT: lw t0, -84(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v3, t0 +; CHECK-NEXT: call _Z14convert_float4Dv4_h +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v3, 28(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v2, 24(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 20(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 16(v33) +; CHECK-NEXT: lw t0, -80(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -76(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: lw t0, -72(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v2, t0 +; CHECK-NEXT: lw t0, -68(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v3, t0 +; CHECK-NEXT: call _Z14convert_float4Dv4_s +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v3, 44(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v2, 40(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 36(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 32(v33) +; CHECK-NEXT: lw t0, -64(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -60(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: lw t0, -56(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v2, t0 +; CHECK-NEXT: lw t0, -52(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v3, t0 +; CHECK-NEXT: call _Z14convert_float4Dv4_t +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v3, 60(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v2, 56(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 52(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 48(v33) +; CHECK-NEXT: lw t0, -48(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -44(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: lw t0, -40(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v2, t0 +; CHECK-NEXT: lw t0, -36(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v3, t0 +; CHECK-NEXT: call _Z14convert_float4Dv4_i +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v3, 76(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v2, 72(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 68(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 64(v33) +; CHECK-NEXT: lw t0, -32(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -28(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: lw t0, -24(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v2, t0 +; CHECK-NEXT: lw t0, -20(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v3, t0 +; CHECK-NEXT: call _Z14convert_float4Dv4_j +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v3, 92(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v2, 88(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 84(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 80(v33) +; CHECK-NEXT: lw t0, -16(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v0, t0 +; CHECK-NEXT: lw t0, -12(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v1, t0 +; CHECK-NEXT: lw t0, -8(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v2, t0 +; CHECK-NEXT: lw t0, -4(sp) # 4-byte Folded Reload +; CHECK-NEXT: vmv.v.x v3, t0 +; CHECK-NEXT: call _Z14convert_float4Dv4_f +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v3, 108(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v2, 104(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v1, 100(v33) +; CHECK-NEXT: regext zero, zero, 8 +; CHECK-NEXT: vsw12.v v0, 96(v33) +; CHECK-NEXT: lw ra, -104(sp) # 4-byte Folded Reload +; CHECK-NEXT: regext zero, zero, 9 +; CHECK-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, -104 +; CHECK-NEXT: addi tp, tp, -4 +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v32, tp +; CHECK-NEXT: ret entry: - ;CHECK: flw t0, 76(a0) - ;CHECK: flw t0, 72(a0) - ;CHECK: flw t0, 68(a0) - ;CHECK: flw t0, 64(a0) - ;CHECK: lw t0, 60(a0) - ;CHECK: lw t0, 56(a0) - ;CHECK: lw t0, 52(a0) - ;CHECK: lw t0, 48(a0) - ;CHECK: lw t0, 44(a0) - ;CHECK: lw t0, 40(a0) - ;CHECK: lw t0, 36(a0) - ;CHECK: lw t0, 32(a0) - ;CHECK: lhu t0, 22(a0) - ;CHECK: lhu t0, 20(a0) - ;CHECK: lhu t0, 18(a0) - ;CHECK: lhu t0, 16(a0) - ;CHECK: lhu t0, 14(a0) - ;CHECK: lhu t0, 12(a0) - ;CHECK: lhu t0, 10(a0) - ;CHECK: lhu t0, 8(a0) - ;CHECK: lw t0, 4(a0) - ;CHECK: lbu t1, 4(a0) - ;CHECK: lw t0, 80(a0) - ;CHECK: lw t0, 0(a0) - ;CHECK: lbu t1, 0(a0) - %call = call <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef %c) - store <4 x float> %call, ptr addrspace(1) %result, align 16 - %call1 = call <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef %uc) + %call = call <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef %c) + store <4 x float> %call, ptr addrspace(1) %result, align 16 + %call1 = call <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef %uc) %arrayidx2 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 1 store <4 x float> %call1, ptr addrspace(1) %arrayidx2, align 16 - %call3 = call <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef %s) + %call3 = call <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef %s) %arrayidx4 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 2 - store <4 x float> %call3, ptr addrspace(1) %arrayidx4, align 16 - %call5 = call <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef %us) + store <4 x float> %call3, ptr addrspace(1) %arrayidx4, align 16 + %call5 = call <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef %us) %arrayidx6 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 3 - store <4 x float> %call5, ptr addrspace(1) %arrayidx6, align 16 - %call7 = call <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef %i) + store <4 x float> %call5, ptr addrspace(1) %arrayidx6, align 16 + %call7 = call <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef %i) %arrayidx8 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 4 - store <4 x float> %call7, ptr addrspace(1) %arrayidx8, align 16 - %call9 = call <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef %ui) + store <4 x float> %call7, ptr addrspace(1) %arrayidx8, align 16 + %call9 = call <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef %ui) %arrayidx10 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 5 - store <4 x float> %call9, ptr addrspace(1) %arrayidx10, align 16 - %call11 = call <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef %f) + store <4 x float> %call9, ptr addrspace(1) %arrayidx10, align 16 + %call11 = call <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef %f) %arrayidx12 = getelementptr inbounds <4 x float>, ptr addrspace(1) %result, i32 6 store <4 x float> %call11, ptr addrspace(1) %arrayidx12, align 16 ret void } -declare dso_local <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef) -declare dso_local <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef) -declare dso_local <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef) -declare dso_local <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef) -declare dso_local <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef) -declare dso_local <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef) -declare dso_local <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef) +declare dso_local <4 x float> @_Z14convert_float4Dv4_c(<4 x i8> noundef) +declare dso_local <4 x float> @_Z14convert_float4Dv4_h(<4 x i8> noundef) +declare dso_local <4 x float> @_Z14convert_float4Dv4_s(<4 x i16> noundef) +declare dso_local <4 x float> @_Z14convert_float4Dv4_t(<4 x i16> noundef) +declare dso_local <4 x float> @_Z14convert_float4Dv4_i(<4 x i32> noundef) +declare dso_local <4 x float> @_Z14convert_float4Dv4_j(<4 x i32> noundef) +declare dso_local <4 x float> @_Z14convert_float4Dv4_f(<4 x float> noundef) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/regexti.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/regexti.ll index 7e9981e39782..132d2cd66270 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/regexti.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/regexti.ll @@ -176,13 +176,51 @@ entry: ret i1 %res } -define dso_local ventus_kernel void @regexti13(ptr addrspace(1) nocapture - noundef align 4 %A, ptr addrspace(3) nocapture noundef align 4 %B) { +define dso_local ventus_kernel void @regexti13(ptr addrspace(1) nocapture ; CHECK-LABEL: regexti13: ; CHECK: # %bb.0: # %entry -; CHECK: regexti zero, zero, 769 +; CHECK-NEXT: addi sp, sp, 12 +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: addi tp, tp, 4 +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v32, tp +; CHECK-NEXT: sw ra, -12(sp) # 4-byte Folded Spill +; CHECK-NEXT: regext zero, zero, 72 +; CHECK-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; CHECK-NEXT: .cfi_offset ra, 0 +; CHECK-NEXT: .cfi_offset v33.l, 0 +; CHECK-NEXT: lw t0, 0(a0) +; CHECK-NEXT: sw t0, -4(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw t0, 4(a0) +; CHECK-NEXT: sw t0, -8(sp) # 4-byte Folded Spill +; CHECK-NEXT: vmv.v.x v0, zero +; CHECK-NEXT: call _Z13get_global_idj +; CHECK-NEXT: regexti zero, zero, 769 ; CHECK-NEXT: vand.vi v33, v0, 15 - +; CHECK-NEXT: vmv.v.x v0, zero +; CHECK-NEXT: call _Z12get_local_idj +; CHECK-NEXT: vsll.vi v0, v0, 2 +; CHECK-NEXT: lw t1, -8(sp) # 4-byte Folded Reload +; CHECK-NEXT: vadd.vx v0, v0, t1 +; CHECK-NEXT: vlw12.v v0, 0(v0) +; CHECK-NEXT: regext zero, zero, 64 +; CHECK-NEXT: vsll.vi v1, v33, 2 +; CHECK-NEXT: lw t0, -4(sp) # 4-byte Folded Reload +; CHECK-NEXT: vadd.vx v1, v1, t0 +; CHECK-NEXT: vlw12.v v2, 0(v1) +; CHECK-NEXT: vadd.vv v0, v2, v0 +; CHECK-NEXT: vsw12.v v0, 0(v1) +; CHECK-NEXT: lw ra, -12(sp) # 4-byte Folded Reload +; CHECK-NEXT: regext zero, zero, 9 +; CHECK-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, -12 +; CHECK-NEXT: addi tp, tp, -4 +; CHECK-NEXT: regext zero, zero, 1 +; CHECK-NEXT: vmv.v.x v32, tp +; CHECK-NEXT: ret + noundef align 4 %A, ptr addrspace(3) nocapture noundef align 4 %B) { + entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) %calland = and i32 %call, 399 @@ -197,4 +235,4 @@ entry: } declare dso_local i32 @_Z13get_global_idj(i32 noundef) -declare dso_local i32 @_Z12get_local_idj(i32 noundef) \ No newline at end of file +declare dso_local i32 @_Z12get_local_idj(i32 noundef) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/resource-usage.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/resource-usage.ll index ecfe59898e04..64a54b3e99d2 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/resource-usage.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/resource-usage.ll @@ -1,24 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs -O0 \ +; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs \ ; RUN: -asm-verbose < %s | FileCheck -check-prefix=VENTUS %s -; VENTUS: .section .rodata.ventus.resource,"w",@progbits -; VENTUS: .half 2 -; VENTUS: .half 5 + +; VENTUS: .section .ventus.resource.usage,"w",@progbits +; VENTUS: .half 0 +; VENTUS: .half 6 ; VENTUS: .half 4 -; VENTUS: .half 0 +; VENTUS: .half 0 + define dso_local ventus_kernel void @usage(ptr addrspace(1) nocapture noundef align 4 %b, ptr addrspace(3) nocapture noundef readonly align 4 %a) local_unnamed_addr #0 { ; VENTUS-LABEL: usage: ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 4 ; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill -; VENTUS-NEXT: lw t1, 0(a0) ; VENTUS-NEXT: lw t0, 4(a0) -; VENTUS-NEXT: # kill: def $v0 killed $x5 -; VENTUS-NEXT: # kill: def $v0 killed $x6 -; VENTUS-NEXT: lw t2, 0(t0) -; VENTUS-NEXT: lw t0, 0(t1) -; VENTUS-NEXT: add t0, t0, t2 +; VENTUS-NEXT: lw t1, 0(a0) +; VENTUS-NEXT: lw t0, 0(t0) +; VENTUS-NEXT: lw t2, 0(t1) +; VENTUS-NEXT: add t0, t2, t0 ; VENTUS-NEXT: sw t0, 0(t1) ; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -4 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/select_instructions.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/select_instructions.ll index 16e8c004be52..ca13332d4c7b 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/select_instructions.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/select_instructions.ll @@ -54,7 +54,8 @@ entry: define dso_local i32 @sle(i32 noundef %a, i32 noundef %b) local_unnamed_addr { ; VENTUS-LABEL: sle: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: vmsle.vv v0, v1, v0 +; VENTUS-NEXT: vmslt.vv v0, v1, v0 +; VENTUS-NEXT: vxor.vi v0, v0, 1 ; VENTUS-NEXT: ret entry: %cmp.not = icmp sle i32 %a, %b @@ -78,7 +79,8 @@ entry: define dso_local i32 @sleu(i32 noundef %a, i32 noundef %b) local_unnamed_addr { ; VENTUS-LABEL: sleu: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: vmsleu.vv v0, v1, v0 +; VENTUS-NEXT: vmsltu.vv v0, v1, v0 +; VENTUS-NEXT: vxor.vi v0, v0, 1 ; VENTUS-NEXT: ret entry: %cmp.not = icmp ule i32 %a, %b @@ -102,7 +104,8 @@ entry: define dso_local i32 @slgt_imm(i32 noundef %a) local_unnamed_addr { ; VENTUS-LABEL: slgt_imm: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: vmsgt.vi v0, v0, 12 +; VENTUS-NEXT: vmsle.vi v0, v0, 11 +; VENTUS-NEXT: vxor.vi v0, v0, 1 ; VENTUS-NEXT: ret entry: %cmp = icmp sgt i32 %a, 11 @@ -114,7 +117,8 @@ entry: define dso_local i32 @slgtu_imm(i32 noundef %a, i32 noundef %b) local_unnamed_addr { ; VENTUS-LABEL: slgtu_imm: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: vmsgtu.vi v0, v0, 12 +; VENTUS-NEXT: vmsleu.vi v0, v0, 11 +; VENTUS-NEXT: vxor.vi v0, v0, 1 ; VENTUS-NEXT: ret entry: %cmp = icmp ugt i32 %a, 11 @@ -126,7 +130,8 @@ entry: define dso_local i32 @slgtu_imm1(i32 noundef %a, i32 noundef %b) local_unnamed_addr { ; VENTUS-LABEL: slgtu_imm1: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: vmsgt.vi v0, v0, 12 +; VENTUS-NEXT: vmsle.vi v0, v0, 11 +; VENTUS-NEXT: vxor.vi v0, v0, 1 ; VENTUS-NEXT: ret entry: %cmp = icmp sgt i32 %a, 11 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/var-arg.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/var-arg.ll index 1c23a2dcaa7f..790d27dce18a 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/var-arg.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/var-arg.ll @@ -13,23 +13,24 @@ target triple = "riscv32" define dso_local i32 @printf(ptr addrspace(2) noundef %fmt, ...) { ; VENTUS-LABEL: printf: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi tp, tp, 40 -; VENTUS-NEXT: .cfi_def_cfa_offset 40 +; VENTUS-NEXT: addi tp, tp, 4 +; VENTUS-NEXT: .cfi_def_cfa_offset 4 ; VENTUS-NEXT: vmv.v.x v8, tp -; VENTUS-NEXT: vsw.v v7, -36(v8) -; VENTUS-NEXT: vsw.v v6, -32(v8) -; VENTUS-NEXT: vsw.v v5, -28(v8) -; VENTUS-NEXT: vsw.v v4, -24(v8) +; VENTUS-NEXT: vsw.v v7, -4(v8) +; VENTUS-NEXT: vsw.v v6, -8(v8) +; VENTUS-NEXT: vsw.v v5, -12(v8) +; VENTUS-NEXT: vsw.v v4, -16(v8) ; VENTUS-NEXT: vsw.v v3, -20(v8) -; VENTUS-NEXT: vsw.v v2, -16(v8) -; VENTUS-NEXT: vsw.v v1, -12(v8) -; VENTUS-NEXT: addi t0, tp, -12 +; VENTUS-NEXT: vsw.v v2, -24(v8) +; VENTUS-NEXT: vsw.v v1, -28(v8) +; VENTUS-NEXT: addi t0, tp, -36 ; VENTUS-NEXT: vmv.v.x v0, t0 -; VENTUS-NEXT: vsw.v v0, -12(v8) -; VENTUS-NEXT: addi t0, tp, -8 +; VENTUS-NEXT: vsw.v v0, -4(v8) +; VENTUS-NEXT: addi t0, tp, -32 ; VENTUS-NEXT: vmv.v.x v0, t0 -; VENTUS-NEXT: vsw.v v0, -12(v8) -; VENTUS-NEXT: addi tp, tp, -40 +; VENTUS-NEXT: vsw.v v0, -4(v8) +; VENTUS-NEXT: addi tp, tp, -4 +; VENTUS-NEXT: vmv.v.x v8, tp ; VENTUS-NEXT: ret entry: %retval = alloca i32, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll index 224900905e84..778ae88fb542 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/vbranch-join.ll @@ -8,7 +8,7 @@ define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr { ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 4 ; VENTUS-NEXT: .cfi_def_cfa_offset 4 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: vmv.v.x v0, zero ; VENTUS-NEXT: call _Z13get_global_idj @@ -37,7 +37,7 @@ define dso_local i32 @branch(i32 noundef %dim) local_unnamed_addr { ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 ; VENTUS-NEXT: vadd.vx v0, v1, zero -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -4 ; VENTUS-NEXT: ret entry: @@ -63,7 +63,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 8 ; VENTUS-NEXT: .cfi_def_cfa_offset 8 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill ; VENTUS-NEXT: vmv.v.x v0, zero @@ -97,7 +97,7 @@ define dso_local ventus_kernel void @loop_branch(ptr addrspace(1) nocapture noun ; VENTUS-NEXT: .LBB1_3: # %for.cond.cleanup ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -8 ; VENTUS-NEXT: ret entry: @@ -134,8 +134,10 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr { ; VENTUS-NEXT: .cfi_def_cfa_offset 4 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vmv.v.x v32, tp -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, 4 +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: regext zero, zero, 72 +; VENTUS-NEXT: vsw.v v33, -4(v32) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: .cfi_offset v33.l, 0 ; VENTUS-NEXT: vmv.v.x v0, zero ; VENTUS-NEXT: call _Z13get_global_idj @@ -169,7 +171,6 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr { ; VENTUS-NEXT: vblt v0, v33, .LBB2_5 ; VENTUS-NEXT: # %bb.3: # %if.then2 ; VENTUS-NEXT: li t0, 23 -; VENTUS-NEXT: vmv.v.x v0, t0 ; VENTUS-NEXT: j .LBB2_6 ; VENTUS-NEXT: .LBB2_4: # %if.end7 ; VENTUS-NEXT: li t0, 4 @@ -178,16 +179,20 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr { ; VENTUS-NEXT: j .LBB2_7 ; VENTUS-NEXT: .LBB2_5: ; VENTUS-NEXT: li t0, 12 -; VENTUS-NEXT: vmv.v.x v0, t0 ; VENTUS-NEXT: .LBB2_6: # %cleanup9 ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 +; VENTUS-NEXT: vmv.v.x v0, t0 ; VENTUS-NEXT: .LBB2_7: # %cleanup9 ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: regext zero, zero, 9 +; VENTUS-NEXT: vlw.v v33, -4(v32) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -4 ; VENTUS-NEXT: addi tp, tp, -4 +; VENTUS-NEXT: regext zero, zero, 1 +; VENTUS-NEXT: vmv.v.x v32, tp ; VENTUS-NEXT: ret entry: %call = call i32 @_Z13get_global_idj(i32 noundef 0) @@ -219,7 +224,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 8 ; VENTUS-NEXT: .cfi_def_cfa_offset 8 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill ; VENTUS-NEXT: vmv.v.x v0, zero @@ -269,7 +274,7 @@ define dso_local ventus_kernel void @double_loop(ptr addrspace(1) nocapture noun ; VENTUS-NEXT: .LBB3_5: # %for.cond.cleanup ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -8 ; VENTUS-NEXT: ret entry: @@ -313,7 +318,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 8 ; VENTUS-NEXT: .cfi_def_cfa_offset 8 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: sw a0, -4(sp) # 4-byte Folded Spill ; VENTUS-NEXT: vmv.v.x v0, zero @@ -336,7 +341,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun ; VENTUS-NEXT: vadd.vi v3, v4, 8 ; VENTUS-NEXT: vadd.vi v4, v4, 4 ; VENTUS-NEXT: li t2, 1 -; VENTUS-NEXT: li s0, 2 +; VENTUS-NEXT: li s1, 2 ; VENTUS-NEXT: j .LBB4_5 ; VENTUS-NEXT: .LBB4_2: # %sw.default ; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1 @@ -361,11 +366,11 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun ; VENTUS-NEXT: # %bb.6: # %for.body ; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1 ; VENTUS-NEXT: vadd.vx v5, v4, zero -; VENTUS-NEXT: vmv.v.x v6, s0 +; VENTUS-NEXT: vmv.v.x v6, s1 ; VENTUS-NEXT: beq t0, t2, .LBB4_3 ; VENTUS-NEXT: # %bb.7: # %for.body ; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1 -; VENTUS-NEXT: bne t0, s0, .LBB4_2 +; VENTUS-NEXT: bne t0, s1, .LBB4_2 ; VENTUS-NEXT: # %bb.8: # %sw.bb4 ; VENTUS-NEXT: # in Loop: Header=BB4_5 Depth=1 ; VENTUS-NEXT: li t1, 23 @@ -375,7 +380,7 @@ define dso_local ventus_kernel void @loop_switch(ptr addrspace(1) nocapture noun ; VENTUS-NEXT: .LBB4_9: # %for.cond.cleanup ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -8 ; VENTUS-NEXT: ret entry: @@ -427,7 +432,7 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr { ; VENTUS: # %bb.0: # %entry ; VENTUS-NEXT: addi sp, sp, 4 ; VENTUS-NEXT: .cfi_def_cfa_offset 4 -; VENTUS-NEXT: sw ra, 0(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill ; VENTUS-NEXT: .cfi_offset ra, 0 ; VENTUS-NEXT: li t0, 2 ; VENTUS-NEXT: vmv.v.x v1, t0 @@ -462,7 +467,7 @@ define dso_local i32 @_Z13get_global_idj(i32 noundef %dim) local_unnamed_addr { ; VENTUS-NEXT: .LBB5_7: # %return ; VENTUS-NEXT: # Label of block must be emitted ; VENTUS-NEXT: join zero, zero, 0 -; VENTUS-NEXT: lw ra, 0(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload ; VENTUS-NEXT: addi sp, sp, -4 ; VENTUS-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/vecargtest.cl b/llvm/test/CodeGen/RISCV/VentusGPGPU/vecargtest.cl index f40c5702f7bf..5ec01121d309 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/vecargtest.cl +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/vecargtest.cl @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py // RUN: clang -target riscv32 -mcpu=ventus-gpgpu < %s \ // RUN: | FileCheck -check-prefix=VENTUS %s diff --git a/llvm/test/MC/RISCV/ventus/vconvert.s b/llvm/test/MC/RISCV/ventus/vconvert.s index c1e46fda7325..c94ba2658ae0 100644 --- a/llvm/test/MC/RISCV/ventus/vconvert.s +++ b/llvm/test/MC/RISCV/ventus/vconvert.s @@ -20,18 +20,3 @@ vfcvt.f.xu.v v4, v2 vfcvt.f.x.v v4, v2 # CHECK-INST: vfcvt.f.x.v v4, v2 # CHECK-ENCODING: [0x57,0x92,0x21,0x4a] - -vfcvt.rtz.xu.f.v v4, v2 -# CHECK-INST: vfcvt.rtz.xu.f.v v4, v2 -# CHECK-ENCODING: [0x57,0x12,0x23,0x4a] - -vfcvt.rtz.x.f.v v4, v2 -# CHECK-INST: vfcvt.rtz.x.f.v v4, v2 -# CHECK-ENCODING: [0x57,0x92,0x23,0x4a] - - - - - - - diff --git a/llvm/test/MC/RISCV/ventus/vmove.s b/llvm/test/MC/RISCV/ventus/vmove.s index 6cff8c1e940e..63fa8d5d5f2b 100644 --- a/llvm/test/MC/RISCV/ventus/vmove.s +++ b/llvm/test/MC/RISCV/ventus/vmove.s @@ -4,11 +4,6 @@ # RUN: | llvm-objdump -d --mattr=+v - \ # RUN: | FileCheck %s --check-prefix=CHECK-INST - -vmv.x.s gp, v6 -# CHECK-INST: vmv.x.s gp, v6 -# CHECK-ENCODING: [0xd7,0x21,0x60,0x42] - # SKIP VMV_S_X # SKIP VMERGE_VVM @@ -20,10 +15,3 @@ vmv.x.s gp, v6 vmv.v.x v6, s0 # CHECK-INST: vmv.v.x v6, s0 # CHECK-ENCODING: [0x57,0x43,0x04,0x5e] - -# SKIP VFMERGE_VFM - -vfmv.v.f v6, s0 -# CHECK-INST: vfmv.v.f v6, s0 -# CHECK-ENCODING: [0x57,0x53,0x04,0x5e] -