diff --git a/llvm/lib/Target/RISCV/VentusInstrInfoV.td b/llvm/lib/Target/RISCV/VentusInstrInfoV.td index 7527e4c35a42..6dcd3c530094 100644 --- a/llvm/lib/Target/RISCV/VentusInstrInfoV.td +++ b/llvm/lib/Target/RISCV/VentusInstrInfoV.td @@ -19,19 +19,18 @@ include "VentusInstrFormatsV.td" /// Generic pattern classes // RVV VV, VX, VI instruction pattern class for integer binary operations -multiclass PatVXIBin Insts> { +multiclass PatVXIBin Insts> { def : Pat<(Op (XLenVT VGPR:$rs1), (XLenVT VGPR:$rs2)), - (Insts[0] VGPR:$rs1, VGPR:$rs2)>; + (XLenVT (Insts[0] VGPR:$rs1, VGPR:$rs2))>; def : Pat<(Op (XLenVT VGPR:$rs1), GPR:$rs2), - (Insts[1] VGPR:$rs1, GPR:$rs2)>; + (XLenVT (Insts[1] VGPR:$rs1, GPR:$rs2))>; if !eq(!size(Insts), 3) then def : Pat<(XLenVT (Op (XLenVT VGPR:$rs1), uimm5:$imm)), - (Insts[2] VGPR:$rs1, uimm5:$imm)>; + (XLenVT (Insts[2] VGPR:$rs1, uimm5:$imm))>; } - // RVV VV, VF, FV instruction pattern class for floating point binary operations multiclass PatVFRBin Insts> { def : Pat<(Op (f32 VGPR:$rs1), (f32 VGPR:$rs2)), @@ -56,7 +55,7 @@ class DivergentStPat (Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>; // RVV VV, VF instruction pattern class for floating point ternary operations -multiclass PatVFRTer Ops, list Insts> { +multiclass PatVFRTer Ops, list Insts> { def : Pat<(Ops[0] (Ops[1] (f32 VGPR:$rs1), (f32 VGPR:$rs2)), (f32 VGPR:$rs3)), (Insts[0] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>; @@ -65,7 +64,7 @@ multiclass PatVFRTer Ops, list Insts> { (f32 VGPR:$rs3)), (Insts[1] VGPR:$rs2, GPRF32:$rs1, VGPR:$rs3)>; } -multiclass AnyPatVFRTer Insts> { +multiclass AnyPatVFRTer Insts> { def : Pat<(Op (f32 VGPR:$rs1), (f32 VGPR:$rs2), (f32 VGPR:$rs3)), (Insts[0] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>; @@ -74,6 +73,19 @@ multiclass AnyPatVFRTer Insts> { (Insts[1] VGPR:$rs2, GPRF32:$rs1, VGPR:$rs3)>; } +// Complex VV, VX, VI instruction pattern class for integers select operation +multiclass SleOpePatVXIBin Ops, list Insts> { + + def : Pat<(XLenVT (Ops[0] (Ops[1] (XLenVT VGPR:$rs1), (XLenVT VGPR:$rs2)), 1)), + (Insts[0] VGPR:$rs1, VGPR:$rs2)>; + + def : Pat<(XLenVT (Ops[0] (Ops[1] (XLenVT VGPR:$rs1), (XLenVT GPR:$rs2)), 1)), + (Insts[1] VGPR:$rs1, GPR:$rs2)>; + + def : Pat<(XLenVT (Ops[0] (Ops[1] (XLenVT VGPR:$rs1), uimm5:$rs2), 1)), + (Insts[2] VGPR:$rs1, uimm5:$rs2)>; +} +// RVV VV, VF instruction pattern class for seletcing ins //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -198,7 +210,7 @@ class VALUVV funct6, RISCVVFormat opv, string opcodestr> // op vd, vs1, vs2 (reverse the order of vs1 and vs2) class VALUrVV funct6, RISCVVFormat opv, string opcodestr> - : RVInstVV; @@ -228,7 +240,7 @@ class VALUVF funct6, RISCVVFormat opv, string opcodestr> // op vd, rs1, vs2 (Float) (reverse the order of rs1 and vs2) class VALUrVF funct6, RISCVVFormat opv, string opcodestr> - : RVInstVX; @@ -920,16 +932,18 @@ defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>; // Vector Single-Width Floating-Point Fused Multiply-Add Instructions let Uses = [FRM], mayRaiseFPException = true in { -defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; -defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; -defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; -defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>; -let Constraints = "$vd = $vd_w" in { +let Constraints = "$vd_wb = $vs2" in { + defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>; + defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>; + defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>; + defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>; +} +let Constraints = "$vd = $vd_wb" in { defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>; defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>; - defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>; + defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>; defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>; -} +} } // Vector Widening Floating-Point Fused Multiply-Add Instructions @@ -976,6 +990,10 @@ defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>; defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>; } // RVVConstraint = NoConstraint, mayRaiseFPException = true +def : InstAlias<"vmsle.vi $vd, $va, !sub($vb, 1)", + (VMSLT_VI VGPR:$vd, VGPR:$va, uimm5:$vb), 1>; +def : InstAlias<"vmsleu.vi $vd, $va, !sub($vb, 1)", + (VMSLTU_VI VGPR:$vd, VGPR:$va, uimm5:$vb), 1>; def : InstAlias<"vmfgt.vv $vd, $va, $vb", (VMFLT_VV VGPR:$vd, VGPR:$vb, VGPR:$va), 0>; def : InstAlias<"vmfge.vv $vd, $va, $vb", @@ -1076,6 +1094,7 @@ def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VGPR:$vd_wb), // Ventus vALU divergent execution patterns //===----------------------------------------------------------------------===// +// TODO: need to add VX instruction support later defm : PatVXIBin, [VMIN_VV, VMIN_VX]>; defm : PatVXIBin, [VMINU_VV, VMINU_VX]>; defm : PatVXIBin, [VMAX_VV, VMAX_VX]>; @@ -1095,6 +1114,16 @@ defm : PatVXIBin, [VDIV_VV, VDIV_VX]>; defm : PatVXIBin, [VDIVU_VV, VDIVU_VX]>; defm : PatVXIBin, [VREM_VV, VREM_VX]>; defm : PatVXIBin, [VREMU_VV, VREMU_VX]>; +defm : PatVXIBin, [VMSLT_VV, VMSLT_VX, VMSLT_VI]>; +defm : PatVXIBin, [VMSLTU_VV, VMSLTU_VX, VMSLTU_VI]>; +defm : PatVXIBin, [VMSNE_VV, VMSNE_VX, VMSNE_VI]>; +defm : PatVXIBin, [VMSEQ_VV, VMSEQ_VX, VMSEQ_VI]>; + +// For now, some instructions are aliaed to other instructions +defm : MultySlOpePatVXIBin<[DivergentBinFrag, DivergentBinFrag], + [VMSLE_VV, VMSLE_VX, VMSGT_VI]>; +defm : MultySlOpePatVXIBin<[DivergentBinFrag, DivergentBinFrag], + [VMSLEU_VV, VMSLEU_VX, VMSGTU_VI]>; // Patterns for vrsub.vx and vrsub.vi def : Pat<(sub GPR:$rs1, VGPR:$rs2), (VRSUB_VX VGPR:$rs2, GPR:$rs1)>; @@ -1110,14 +1139,16 @@ defm : PatVFRBin, [VFMAX_VV, VFMAX_VF]>; // Patterns for ternary operations // vfmadd.vv && vfmadd.vf -defm : PatVFRTer<[DivergentBinFrag, DivergentBinFrag], +defm : PatVFRTer<[DivergentBinFrag, DivergentBinFrag], [VFMADD_VV, VFMADD_VF]>; defm : AnyPatVFRTer; // vfmsub.vv && vfmsub.vf defm : PatVFRTer<[DivergentBinFrag, DivergentBinFrag], [VFMSUB_VV, VFMSUB_VF]>; - +// vfsqrt.v +def : Pat<(any_fsqrt (f32 VGPR:$rs1)), (VFSQRT_V (f32 VGPR:$rs1))>; +// TODO: vfrec7.v? what is this def : DivergentLdPat; def : DivergentLdPat; def : DivergentLdPat; diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll index 7caf0824b3a6..46e18f1b05e9 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin-noverify.ll @@ -20,17 +20,17 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp ; VENTUS-NEXT: vadd.vx v1, v1, x0 ; VENTUS-NEXT: regext x0, x0, 1 ; VENTUS-NEXT: vadd.vx v2, v0, x0 -; VENTUS-NEXT: vfmv.s.f v0, x0 +; VENTUS-NEXT: vmv.s.x v0, x0 ; VENTUS-NEXT: call _Z13get_global_idj -; VENTUS-NEXT: vfmv.s.f v1, x0 +; VENTUS-NEXT: vmv.s.x v1, x0 ; VENTUS-NEXT: vsll.vi v0, v0, 2 ; VENTUS-NEXT: regext x0, x0, 1 ; VENTUS-NEXT: vadd.vv v2, v1, v0 -; VENTUS-NEXT: vfmv.f.s x10, v2 +; VENTUS-NEXT: vmv.x.s x10, v2 ; VENTUS-NEXT: vluxei32.v v2, (x10), v1 ; VENTUS-NEXT: regext x0, x0, 1 ; VENTUS-NEXT: vadd.vv v0, v2, v0 -; VENTUS-NEXT: vfmv.f.s x10, v0 +; VENTUS-NEXT: vmv.x.s x10, v0 ; VENTUS-NEXT: vluxei32.v v0, (x10), v1 ; VENTUS-NEXT: vadd.vv v0, v0, v2 ; VENTUS-NEXT: vsuxei32.v v0, (x10), v1 diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll index 28e67849ca7f..cc8da8d59e83 100644 --- a/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/float.ll @@ -44,3 +44,13 @@ entry: %mul = fmul float %a, 1.25 ret float %mul } + +define float @sqrt_f32(float %a) { +; VENTUS-LABEL: sqrt_f32: +; VENTUS: # %bb.0: +; VENTUS-NEXT: vfsqrt.v v0, v0 +; VENTUS-NEXT: ret + %b = call float @llvm.sqrt.f32(float %a) + ret float %b +} +declare float @llvm.sqrt.f32(float %Val) diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/select_instructions.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/select_instructions.ll new file mode 100644 index 000000000000..49c1ef084eb3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/select_instructions.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=VENTUS %s + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @slt(i32 noundef %a, i32 noundef %b) local_unnamed_addr { +; VENTUS-LABEL: slt: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmslt.vv v0, v0, v1 +; VENTUS-NEXT: ret +entry: + %cmp = icmp slt i32 %a, %b + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @slt_imm(i32 noundef %a) local_unnamed_addr { +; VENTUS-LABEL: slt_imm: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmslt.vi v0, v0, 12 +; VENTUS-NEXT: ret +entry: + %cmp = icmp slt i32 %a, 12 + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @sltu(i32 noundef %a, i32 noundef %b) local_unnamed_addr { +; VENTUS-LABEL: sltu: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsltu.vv v0, v0, v1 +; VENTUS-NEXT: ret +entry: + %cmp = icmp ult i32 %a, %b + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @sltu_imm(i32 noundef %a) local_unnamed_addr { +; VENTUS-LABEL: sltu_imm: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsltu.vi v0, v0, 12 +; VENTUS-NEXT: ret +entry: + %cmp = icmp ult i32 %a, 12 + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @sle(i32 noundef %a, i32 noundef %b) local_unnamed_addr { +; VENTUS-LABEL: sle: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsle.vv v0, v1, v0 +; VENTUS-NEXT: ret +entry: + %cmp.not = icmp sle i32 %a, %b + %cond = zext i1 %cmp.not to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @sle_imm(i32 noundef %a) local_unnamed_addr { +; VENTUS-LABEL: sle_imm: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmslt.vi v0, v0, 13 +; VENTUS-NEXT: ret +entry: + %cmp = icmp slt i32 %a, 13 + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @sleu(i32 noundef %a, i32 noundef %b) local_unnamed_addr { +; VENTUS-LABEL: sleu: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsleu.vv v0, v1, v0 +; VENTUS-NEXT: ret +entry: + %cmp.not = icmp ule i32 %a, %b + %cond = zext i1 %cmp.not to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @sleu_imm(i32 noundef %a) local_unnamed_addr { +; VENTUS-LABEL: sleu_imm: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsltu.vi v0, v0, 13 +; VENTUS-NEXT: ret +entry: + %cmp = icmp ult i32 %a, 13 + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @slgt_imm(i32 noundef %a) local_unnamed_addr { +; VENTUS-LABEL: slgt_imm: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsgt.vi v0, v0, 12 +; VENTUS-NEXT: ret +entry: + %cmp = icmp sgt i32 %a, 11 + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @slgtu_imm(i32 noundef %a, i32 noundef %b) local_unnamed_addr { +; VENTUS-LABEL: slgtu_imm: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsgtu.vi v0, v0, 12 +; VENTUS-NEXT: ret +entry: + %cmp = icmp ugt i32 %a, 11 + %cond = zext i1 %cmp to i32 + ret i32 %cond +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +define dso_local i32 @slgtu_imm1(i32 noundef %a, i32 noundef %b) local_unnamed_addr { +; VENTUS-LABEL: slgtu_imm1: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: vmsgt.vi v0, v0, 12 +; VENTUS-NEXT: ret +entry: + %cmp = icmp sgt i32 %a, 11 + %cond = zext i1 %cmp to i32 + ret i32 %cond +}