From c6351822661eb455e51ffdcbede3ea475fa0289d Mon Sep 17 00:00:00 2001 From: zhoujing Date: Wed, 4 Jan 2023 15:04:36 +0800 Subject: [PATCH] Update pattern and test cases for float/integer convert instructions --- llvm/lib/Target/RISCV/VentusInstrInfoV.td | 13 +++++ .../CodeGen/RISCV/VentusGPGPU/addr-space.ll | 32 ++++++------ .../CodeGen/RISCV/VentusGPGPU/addr-space2.ll | 2 +- .../RISCV/VentusGPGPU/builtin-noverify.ll | 26 +++++----- .../test/CodeGen/RISCV/VentusGPGPU/builtin.ll | 52 +++++++++---------- llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll | 44 ++++++++++++++++ .../RISCV/VentusGPGPU/function-call.ll | 20 +++---- 7 files changed, 123 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/RISCV/VentusInstrInfoV.td b/llvm/lib/Target/RISCV/VentusInstrInfoV.td index af755998a4e7..029241e02550 100644 --- a/llvm/lib/Target/RISCV/VentusInstrInfoV.td +++ b/llvm/lib/Target/RISCV/VentusInstrInfoV.td @@ -106,6 +106,10 @@ class PatFloatSetCC Ty, CondCode Cond, RVInst Inst> : Pat<(DivergentTernaryFrag (f32 Ty[0]:$rs1), (f32 Ty[1]:$rs2), Cond), (i32 (Inst Ty[0]:$rs1, Ty[1]:$rs2))>; +// Float/integer type convert pattern +class PatFXConvert Ty, RVInst Inst> + : Pat<(Ty[0] (Frag (Ty[1] VGPR:$rs1))),(Inst VGPR:$rs1)>; + //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -1169,6 +1173,15 @@ def : PatFloatSetCC<[VGPR, VGPR], SETOLE, VMFLE_VV>; def : PatFloatSetCC<[VGPR, GPRF32], SETOLE, VMFLE_VV>; def : PatFloatSetCC<[VGPR, GPRF32], SETOGT, VMFGT_VF>; def : PatFloatSetCC<[VGPR, GPRF32], SETOGE, VMFGE_VF>; + +// TODO: add vfcvt.rtz +def : PatFXConvert, [i32, f32], VFCVT_X_F_V>; +def : PatFXConvert, + [i32, f32], VFCVT_XU_F_V>; +def : PatFXConvert, [f32, i32], VFCVT_F_X_V>; +def : PatFXConvert, + [f32, i32], VFCVT_F_XU_V>; + // Patterns for vrsub.vx and vrsub.vi def : Pat<(sub GPR:$rs1, VGPR:$rs2), (VRSUB_VX VGPR:$rs2, GPR:$rs1)>; def : Pat<(XLenVT (sub uimm5:$imm, (XLenVT VGPR:$rs1))), diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll index 539d1b72cb31..6d57f47c23d5 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space.ll @@ -5,17 +5,17 @@ define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(3) nocapture noundef readonly align 4 %B) { ; VENTUS-LABEL: func: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi sp, sp, -16 -; VENTUS-NEXT: addi tp, tp, -16 +; VENTUS-NEXT: addi sp, sp, 16 +; VENTUS-NEXT: addi tp, tp, 16 ; VENTUS-NEXT: .cfi_def_cfa_offset 16 -; VENTUS-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; VENTUS-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; VENTUS-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; VENTUS-NEXT: vsw v32, 0(tp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, -4 -; VENTUS-NEXT: .cfi_offset s0, -8 -; VENTUS-NEXT: .cfi_offset s1, -12 -; VENTUS-NEXT: .cfi_offset v32.l, -16 +; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw s0, -8(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw s1, -12(sp) # 4-byte Folded Spill +; VENTUS-NEXT: vsw v32, -16(tp) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 12 +; VENTUS-NEXT: .cfi_offset s0, 8 +; VENTUS-NEXT: .cfi_offset s1, 4 +; VENTUS-NEXT: .cfi_offset v32.l, 0 ; VENTUS-NEXT: lw s0, 0(a0) ; VENTUS-NEXT: lw s1, 4(a0) ; VENTUS-NEXT: vmv.s.x v0, zero @@ -36,12 +36,12 @@ define dso_local spir_kernel void @func(ptr addrspace(1) nocapture noundef align ; VENTUS-NEXT: vluxei32.v v2, (a0), v1 ; VENTUS-NEXT: vadd.vv v0, v2, v0 ; VENTUS-NEXT: vsuxei32.v v0, (a0), v1 -; VENTUS-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; VENTUS-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; VENTUS-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; VENTUS-NEXT: vlw v32, 0(tp) # 4-byte Folded Reload -; VENTUS-NEXT: addi sp, sp, 16 -; VENTUS-NEXT: addi tp, tp, 16 +; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw s0, -8(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw s1, -12(sp) # 4-byte Folded Reload +; VENTUS-NEXT: vlw v32, -16(tp) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -16 +; VENTUS-NEXT: addi tp, tp, -16 ; VENTUS-NEXT: ret entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll index a1ab8d71f9a6..3573f7f68438 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll @@ -39,7 +39,7 @@ define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { ; VENTUS-NEXT: add s2, s2, a0 ; VENTUS-NEXT: add s1, s1, a0 ; VENTUS-NEXT: lw a1, 0(s1) -; VENTUS-NEXT: vlw v1, 0(s2) +; VENTUS-NEXT: vlw v1, zero(s2) ; VENTUS-NEXT: vmv.s.x v2, a1 ; VENTUS-NEXT: add a0, s0, a0 ; VENTUS-NEXT: lw a1, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll index e70edf9baca8..c1b766cec6d7 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll @@ -5,17 +5,17 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) { ; VENTUS-LABEL: foo_fun: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi sp, sp, -16 -; VENTUS-NEXT: addi tp, tp, -16 +; VENTUS-NEXT: addi sp, sp, 16 +; VENTUS-NEXT: addi tp, tp, 16 ; VENTUS-NEXT: .cfi_def_cfa_offset 16 -; VENTUS-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill ; VENTUS-NEXT: regext zero, zero, 1 -; VENTUS-NEXT: vsw v1, 8(tp) # 4-byte Folded Spill +; VENTUS-NEXT: vsw v1, -12(tp) # 4-byte Folded Spill ; VENTUS-NEXT: regext zero, zero, 1 -; VENTUS-NEXT: vsw v2, 4(tp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, -4 -; VENTUS-NEXT: .cfi_offset v32.l, -8 -; VENTUS-NEXT: .cfi_offset v33.l, -12 +; VENTUS-NEXT: vsw v2, -16(tp) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 8 +; VENTUS-NEXT: .cfi_offset v32.l, 4 +; VENTUS-NEXT: .cfi_offset v33.l, 0 ; VENTUS-NEXT: regext zero, zero, 1 ; VENTUS-NEXT: vadd.vx v1, v1, zero ; VENTUS-NEXT: regext zero, zero, 1 @@ -34,13 +34,13 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp ; VENTUS-NEXT: vluxei32.v v0, (a0), v1 ; VENTUS-NEXT: vadd.vv v0, v0, v2 ; VENTUS-NEXT: vsuxei32.v v0, (a0), v1 -; VENTUS-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload ; VENTUS-NEXT: regext zero, zero, 1 -; VENTUS-NEXT: vlw v1, 8(tp) # 4-byte Folded Reload +; VENTUS-NEXT: vlw v1, -12(tp) # 4-byte Folded Reload ; VENTUS-NEXT: regext zero, zero, 1 -; VENTUS-NEXT: vlw v2, 4(tp) # 4-byte Folded Reload -; VENTUS-NEXT: addi sp, sp, 16 -; VENTUS-NEXT: addi tp, tp, 16 +; VENTUS-NEXT: vlw v2, -16(tp) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -16 +; VENTUS-NEXT: addi tp, tp, -16 ; VENTUS-NEXT: ret entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll index 007800bbaf65..20b2b32fb0d4 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll @@ -5,15 +5,15 @@ define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) { ; VENTUS-LABEL: foo_ker: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi sp, sp, -16 -; VENTUS-NEXT: addi tp, tp, -16 +; VENTUS-NEXT: addi sp, sp, 16 +; VENTUS-NEXT: addi tp, tp, 16 ; VENTUS-NEXT: .cfi_def_cfa_offset 16 -; VENTUS-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; VENTUS-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; VENTUS-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, -4 -; VENTUS-NEXT: .cfi_offset s0, -8 -; VENTUS-NEXT: .cfi_offset s1, -12 +; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw s0, -12(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw s1, -16(sp) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 8 +; VENTUS-NEXT: .cfi_offset s0, 4 +; VENTUS-NEXT: .cfi_offset s1, 0 ; VENTUS-NEXT: lw s0, 0(a0) ; VENTUS-NEXT: lw s1, 4(a0) ; VENTUS-NEXT: vmv.s.x v0, zero @@ -30,11 +30,11 @@ define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ; VENTUS-NEXT: vluxei32.v v0, (a0), v1 ; VENTUS-NEXT: vadd.vv v0, v0, v2 ; VENTUS-NEXT: vsuxei32.v v0, (a0), v1 -; VENTUS-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; VENTUS-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; VENTUS-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; VENTUS-NEXT: addi sp, sp, 16 -; VENTUS-NEXT: addi tp, tp, 16 +; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw s1, -16(sp) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -16 +; VENTUS-NEXT: addi tp, tp, -16 ; VENTUS-NEXT: ret entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) @@ -50,15 +50,15 @@ entry: define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) { ; VENTUS-LABEL: foo_fun: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi sp, sp, -16 -; VENTUS-NEXT: addi tp, tp, -16 +; VENTUS-NEXT: addi sp, sp, 16 +; VENTUS-NEXT: addi tp, tp, 16 ; VENTUS-NEXT: .cfi_def_cfa_offset 16 -; VENTUS-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; VENTUS-NEXT: vsw v32, 8(tp) # 4-byte Folded Spill -; VENTUS-NEXT: vsw v33, 4(tp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, -4 -; VENTUS-NEXT: .cfi_offset v32.l, -8 -; VENTUS-NEXT: .cfi_offset v33.l, -12 +; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill +; VENTUS-NEXT: vsw v32, -12(tp) # 4-byte Folded Spill +; VENTUS-NEXT: vsw v33, -16(tp) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 8 +; VENTUS-NEXT: .cfi_offset v32.l, 4 +; VENTUS-NEXT: .cfi_offset v33.l, 0 ; VENTUS-NEXT: vadd.vx v32, v1, zero ; VENTUS-NEXT: vadd.vx v33, v0, zero ; VENTUS-NEXT: vmv.s.x v0, zero @@ -73,11 +73,11 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp ; VENTUS-NEXT: vluxei32.v v0, (a0), v1 ; VENTUS-NEXT: vadd.vv v0, v0, v2 ; VENTUS-NEXT: vsuxei32.v v0, (a0), v1 -; VENTUS-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; VENTUS-NEXT: vlw v32, 8(tp) # 4-byte Folded Reload -; VENTUS-NEXT: vlw v33, 4(tp) # 4-byte Folded Reload -; VENTUS-NEXT: addi sp, sp, 16 -; VENTUS-NEXT: addi tp, tp, 16 +; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload +; VENTUS-NEXT: vlw v32, -12(tp) # 4-byte Folded Reload +; VENTUS-NEXT: vlw v33, -16(tp) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -16 +; VENTUS-NEXT: addi tp, tp, -16 ; VENTUS-NEXT: ret entry: %call = tail call i32 @_Z13get_global_idj(i32 noundef 0) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll index 5fd614af1ca4..9b7e88eb4bae 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll @@ -268,3 +268,47 @@ entry: %sub = fsub float %mul, %c ret float %sub } + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) vscale_range(1,2048) +define dso_local i32 @fcvt_x_f(float noundef %a) local_unnamed_addr { +; VENTUS-LABEL: fcvt_x_f: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vfcvt.x.f.v v0, v0 +; VENTUS-NEXT: ret +entry: + %conv = fptosi float %a to i32 + ret i32 %conv +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) vscale_range(1,2048) +define dso_local i32 @fcvtu_xu_f(float noundef %a) local_unnamed_addr { +; VENTUS-LABEL: fcvtu_xu_f: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vfcvt.xu.f.v v0, v0 +; VENTUS-NEXT: ret +entry: + %conv = fptoui float %a to i32 + ret i32 %conv +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) vscale_range(1,2048) +define dso_local float @fcvt_f_x(i32 noundef %a) local_unnamed_addr { +; VENTUS-LABEL: fcvt_f_x: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vfcvt.f.x.v v0, v0 +; VENTUS-NEXT: ret +entry: + %conv = sitofp i32 %a to float + ret float %conv +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) vscale_range(1,2048) +define dso_local float @fcvt_f_xu(i32 noundef %a) local_unnamed_addr { +; VENTUS-LABEL: fcvt_f_xu: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vfcvt.f.xu.v v0, v0 +; VENTUS-NEXT: ret +entry: + %conv = uitofp i32 %a to float + ret float %conv +} diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll index 0563763158ee..098f76d8ef08 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/function-call.ll @@ -23,13 +23,13 @@ entry: define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrspace(1) noundef align 4 %c) { ; VENTUS-LABEL: foo: ; VENTUS: # %bb.0: # %entry -; VENTUS-NEXT: addi sp, sp, -16 -; VENTUS-NEXT: addi tp, tp, -16 +; VENTUS-NEXT: addi sp, sp, 16 +; VENTUS-NEXT: addi tp, tp, 16 ; VENTUS-NEXT: .cfi_def_cfa_offset 16 -; VENTUS-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; VENTUS-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; VENTUS-NEXT: .cfi_offset ra, -4 -; VENTUS-NEXT: .cfi_offset s0, -8 +; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill +; VENTUS-NEXT: sw s0, -16(sp) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset ra, 4 +; VENTUS-NEXT: .cfi_offset s0, 0 ; VENTUS-NEXT: lw s0, 8(a0) ; VENTUS-NEXT: lw a1, 4(a0) ; VENTUS-NEXT: lw a0, 0(a0) @@ -38,10 +38,10 @@ define dso_local spir_kernel void @foo(i32 noundef %a, i32 noundef %b, ptr addrs ; VENTUS-NEXT: call bar ; VENTUS-NEXT: vmv.s.x v1, zero ; VENTUS-NEXT: vsuxei32.v v0, (s0), v1 -; VENTUS-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; VENTUS-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; VENTUS-NEXT: addi sp, sp, 16 -; VENTUS-NEXT: addi tp, tp, 16 +; VENTUS-NEXT: lw ra, -12(sp) # 4-byte Folded Reload +; VENTUS-NEXT: lw s0, -16(sp) # 4-byte Folded Reload +; VENTUS-NEXT: addi sp, sp, -16 +; VENTUS-NEXT: addi tp, tp, -16 ; VENTUS-NEXT: ret entry: %a.addr = alloca i32, align 4, addrspace(5)