Add pattern for integer select instructions

This commit is contained in:
zhoujing 2022-12-30 16:22:59 +08:00
parent 1fab7b80f3
commit 8f6c31ea7d
4 changed files with 198 additions and 22 deletions

View File

@ -19,19 +19,18 @@ include "VentusInstrFormatsV.td"
/// Generic pattern classes
// RVV VV, VX, VI instruction pattern class for integer binary operations
multiclass PatVXIBin<SDPatternOperator Op, list<RVInst> Insts> {
multiclass PatVXIBin<SDPatternOperator Op, list<RVInst> Insts> {
def : Pat<(Op (XLenVT VGPR:$rs1), (XLenVT VGPR:$rs2)),
(Insts[0] VGPR:$rs1, VGPR:$rs2)>;
(XLenVT (Insts[0] VGPR:$rs1, VGPR:$rs2))>;
def : Pat<(Op (XLenVT VGPR:$rs1), GPR:$rs2),
(Insts[1] VGPR:$rs1, GPR:$rs2)>;
(XLenVT (Insts[1] VGPR:$rs1, GPR:$rs2))>;
if !eq(!size(Insts), 3) then
def : Pat<(XLenVT (Op (XLenVT VGPR:$rs1), uimm5:$imm)),
(Insts[2] VGPR:$rs1, uimm5:$imm)>;
(XLenVT (Insts[2] VGPR:$rs1, uimm5:$imm))>;
}
// RVV VV, VF, FV instruction pattern class for floating point binary operations
multiclass PatVFRBin<SDPatternOperator Op, list<RVInst> Insts> {
def : Pat<(Op (f32 VGPR:$rs1), (f32 VGPR:$rs2)),
@ -56,7 +55,7 @@ class DivergentStPat<PatFrag StoreOp, RVInst Inst>
(Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>;
// RVV VV, VF instruction pattern class for floating point ternary operations
multiclass PatVFRTer<list<SDPatternOperator> Ops, list<RVInst> Insts> {
multiclass PatVFRTer<list<SDPatternOperator> Ops, list<RVInst> Insts> {
def : Pat<(Ops[0] (Ops[1] (f32 VGPR:$rs1), (f32 VGPR:$rs2)),
(f32 VGPR:$rs3)), (Insts[0] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>;
@ -65,7 +64,7 @@ multiclass PatVFRTer<list<SDPatternOperator> Ops, list<RVInst> Insts> {
(f32 VGPR:$rs3)), (Insts[1] VGPR:$rs2, GPRF32:$rs1, VGPR:$rs3)>;
}
multiclass AnyPatVFRTer<PatFrags Op, list<RVInst> Insts> {
multiclass AnyPatVFRTer<PatFrags Op, list<RVInst> Insts> {
def : Pat<(Op (f32 VGPR:$rs1), (f32 VGPR:$rs2), (f32 VGPR:$rs3)),
(Insts[0] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>;
@ -74,6 +73,19 @@ multiclass AnyPatVFRTer<PatFrags Op, list<RVInst> Insts> {
(Insts[1] VGPR:$rs2, GPRF32:$rs1, VGPR:$rs3)>;
}
// Complex VV, VX, VI instruction pattern class for integers select operation
multiclass SleOpePatVXIBin<list<PatFrags> Ops, list<RVInst> Insts> {
def : Pat<(XLenVT (Ops[0] (Ops[1] (XLenVT VGPR:$rs1), (XLenVT VGPR:$rs2)), 1)),
(Insts[0] VGPR:$rs1, VGPR:$rs2)>;
def : Pat<(XLenVT (Ops[0] (Ops[1] (XLenVT VGPR:$rs1), (XLenVT GPR:$rs2)), 1)),
(Insts[1] VGPR:$rs1, GPR:$rs2)>;
def : Pat<(XLenVT (Ops[0] (Ops[1] (XLenVT VGPR:$rs1), uimm5:$rs2), 1)),
(Insts[2] VGPR:$rs1, uimm5:$rs2)>;
}
// RVV VV, VF instruction pattern class for seletcing ins
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@ -198,7 +210,7 @@ class VALUVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
// op vd, vs1, vs2 (reverse the order of vs1 and vs2)
class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
: RVInstVV<funct6, opv, (outs VGPR:$vd_w),
: RVInstVV<funct6, opv, (outs VGPR:$vd_wb),
(ins VGPR:$vd, VGPR:$vs1, VGPR:$vs2),
opcodestr, "$vd, $vs1, $vs2">;
@ -228,7 +240,7 @@ class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
// op vd, rs1, vs2 (Float) (reverse the order of rs1 and vs2)
class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
: RVInstVX<funct6, opv, (outs VGPR:$vd_w),
: RVInstVX<funct6, opv, (outs VGPR:$vd_wb),
(ins VGPR:$vd, GPRF32:$rs1, VGPR:$vs2),
opcodestr, "$vd, $rs1, $vs2">;
@ -920,16 +932,18 @@ defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>;
// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
let Uses = [FRM], mayRaiseFPException = true in {
defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>;
defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>;
defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>;
defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>;
let Constraints = "$vd = $vd_w" in {
let Constraints = "$vd_wb = $vs2" in {
defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>;
defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>;
defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>;
defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>;
}
let Constraints = "$vd = $vd_wb" in {
defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>;
defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>;
defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>;
defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>;
defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
}
}
}
// Vector Widening Floating-Point Fused Multiply-Add Instructions
@ -976,6 +990,10 @@ defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>;
defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>;
} // RVVConstraint = NoConstraint, mayRaiseFPException = true
def : InstAlias<"vmsle.vi $vd, $va, !sub($vb, 1)",
(VMSLT_VI VGPR:$vd, VGPR:$va, uimm5:$vb), 1>;
def : InstAlias<"vmsleu.vi $vd, $va, !sub($vb, 1)",
(VMSLTU_VI VGPR:$vd, VGPR:$va, uimm5:$vb), 1>;
def : InstAlias<"vmfgt.vv $vd, $va, $vb",
(VMFLT_VV VGPR:$vd, VGPR:$vb, VGPR:$va), 0>;
def : InstAlias<"vmfge.vv $vd, $va, $vb",
@ -1076,6 +1094,7 @@ def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VGPR:$vd_wb),
// Ventus vALU divergent execution patterns
//===----------------------------------------------------------------------===//
// TODO: need to add VX instruction support later
defm : PatVXIBin<DivergentBinFrag<smin>, [VMIN_VV, VMIN_VX]>;
defm : PatVXIBin<DivergentBinFrag<umin>, [VMINU_VV, VMINU_VX]>;
defm : PatVXIBin<DivergentBinFrag<smax>, [VMAX_VV, VMAX_VX]>;
@ -1095,6 +1114,16 @@ defm : PatVXIBin<DivergentBinFrag<sdiv>, [VDIV_VV, VDIV_VX]>;
defm : PatVXIBin<DivergentBinFrag<udiv>, [VDIVU_VV, VDIVU_VX]>;
defm : PatVXIBin<DivergentBinFrag<srem>, [VREM_VV, VREM_VX]>;
defm : PatVXIBin<DivergentBinFrag<urem>, [VREMU_VV, VREMU_VX]>;
defm : PatVXIBin<DivergentBinFrag<setlt>, [VMSLT_VV, VMSLT_VX, VMSLT_VI]>;
defm : PatVXIBin<DivergentBinFrag<setult>, [VMSLTU_VV, VMSLTU_VX, VMSLTU_VI]>;
defm : PatVXIBin<DivergentBinFrag<setne>, [VMSNE_VV, VMSNE_VX, VMSNE_VI]>;
defm : PatVXIBin<DivergentBinFrag<seteq>, [VMSEQ_VV, VMSEQ_VX, VMSEQ_VI]>;
// For now, some instructions are aliaed to other instructions
defm : MultySlOpePatVXIBin<[DivergentBinFrag<xor>, DivergentBinFrag<setlt>],
[VMSLE_VV, VMSLE_VX, VMSGT_VI]>;
defm : MultySlOpePatVXIBin<[DivergentBinFrag<xor>, DivergentBinFrag<setult>],
[VMSLEU_VV, VMSLEU_VX, VMSGTU_VI]>;
// Patterns for vrsub.vx and vrsub.vi
def : Pat<(sub GPR:$rs1, VGPR:$rs2), (VRSUB_VX VGPR:$rs2, GPR:$rs1)>;
@ -1110,14 +1139,16 @@ defm : PatVFRBin<DivergentBinFrag<fmaximum>, [VFMAX_VV, VFMAX_VF]>;
// Patterns for ternary operations
// vfmadd.vv && vfmadd.vf
defm : PatVFRTer<[DivergentBinFrag<fadd>, DivergentBinFrag<fmul>],
defm : PatVFRTer<[DivergentBinFrag<fadd>, DivergentBinFrag<fmul>],
[VFMADD_VV, VFMADD_VF]>;
defm : AnyPatVFRTer<any_fma, [VFMADD_VV, VFMADD_VF]>;
// vfmsub.vv && vfmsub.vf
defm : PatVFRTer<[DivergentBinFrag<fsub>, DivergentBinFrag<fmul>],
[VFMSUB_VV, VFMSUB_VF]>;
// vfsqrt.v
def : Pat<(any_fsqrt (f32 VGPR:$rs1)), (VFSQRT_V (f32 VGPR:$rs1))>;
// TODO: vfrec7.v? what is this
def : DivergentLdPat<sextloadi8, VLUXEI8>;
def : DivergentLdPat<extloadi8, VLUXEI8>;
def : DivergentLdPat<sextloadi16, VLUXEI16>;

View File

@ -20,17 +20,17 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: vadd.vx v1, v1, x0
; VENTUS-NEXT: regext x0, x0, 1
; VENTUS-NEXT: vadd.vx v2, v0, x0
; VENTUS-NEXT: vfmv.s.f v0, x0
; VENTUS-NEXT: vmv.s.x v0, x0
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vfmv.s.f v1, x0
; VENTUS-NEXT: vmv.s.x v1, x0
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: regext x0, x0, 1
; VENTUS-NEXT: vadd.vv v2, v1, v0
; VENTUS-NEXT: vfmv.f.s x10, v2
; VENTUS-NEXT: vmv.x.s x10, v2
; VENTUS-NEXT: vluxei32.v v2, (x10), v1
; VENTUS-NEXT: regext x0, x0, 1
; VENTUS-NEXT: vadd.vv v0, v2, v0
; VENTUS-NEXT: vfmv.f.s x10, v0
; VENTUS-NEXT: vmv.x.s x10, v0
; VENTUS-NEXT: vluxei32.v v0, (x10), v1
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vsuxei32.v v0, (x10), v1

View File

@ -44,3 +44,13 @@ entry:
%mul = fmul float %a, 1.25
ret float %mul
}
define float @sqrt_f32(float %a) {
; VENTUS-LABEL: sqrt_f32:
; VENTUS: # %bb.0:
; VENTUS-NEXT: vfsqrt.v v0, v0
; VENTUS-NEXT: ret
%b = call float @llvm.sqrt.f32(float %a)
ret float %b
}
declare float @llvm.sqrt.f32(float %Val)

View File

@ -0,0 +1,135 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @slt(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: slt:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmslt.vv v0, v0, v1
; VENTUS-NEXT: ret
entry:
%cmp = icmp slt i32 %a, %b
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @slt_imm(i32 noundef %a) local_unnamed_addr {
; VENTUS-LABEL: slt_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmslt.vi v0, v0, 12
; VENTUS-NEXT: ret
entry:
%cmp = icmp slt i32 %a, 12
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @sltu(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: sltu:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsltu.vv v0, v0, v1
; VENTUS-NEXT: ret
entry:
%cmp = icmp ult i32 %a, %b
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @sltu_imm(i32 noundef %a) local_unnamed_addr {
; VENTUS-LABEL: sltu_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsltu.vi v0, v0, 12
; VENTUS-NEXT: ret
entry:
%cmp = icmp ult i32 %a, 12
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @sle(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: sle:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsle.vv v0, v1, v0
; VENTUS-NEXT: ret
entry:
%cmp.not = icmp sle i32 %a, %b
%cond = zext i1 %cmp.not to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @sle_imm(i32 noundef %a) local_unnamed_addr {
; VENTUS-LABEL: sle_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmslt.vi v0, v0, 13
; VENTUS-NEXT: ret
entry:
%cmp = icmp slt i32 %a, 13
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @sleu(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: sleu:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsleu.vv v0, v1, v0
; VENTUS-NEXT: ret
entry:
%cmp.not = icmp ule i32 %a, %b
%cond = zext i1 %cmp.not to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @sleu_imm(i32 noundef %a) local_unnamed_addr {
; VENTUS-LABEL: sleu_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsltu.vi v0, v0, 13
; VENTUS-NEXT: ret
entry:
%cmp = icmp ult i32 %a, 13
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @slgt_imm(i32 noundef %a) local_unnamed_addr {
; VENTUS-LABEL: slgt_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsgt.vi v0, v0, 12
; VENTUS-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 11
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @slgtu_imm(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: slgtu_imm:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsgtu.vi v0, v0, 12
; VENTUS-NEXT: ret
entry:
%cmp = icmp ugt i32 %a, 11
%cond = zext i1 %cmp to i32
ret i32 %cond
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @slgtu_imm1(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: slgtu_imm1:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmsgt.vi v0, v0, 12
; VENTUS-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 11
%cond = zext i1 %cmp to i32
ret i32 %cond
}