Update pattern and test cases for float/integer arithmetic instructions

This commit is contained in:
zhoujing 2023-01-05 13:35:58 +08:00
parent c635182266
commit 02392af08a
6 changed files with 244 additions and 40 deletions

View File

@ -56,6 +56,11 @@ class DivergentBinFrag<SDPatternOperator Op> : PatFrag<
(Op $src0, $src1),
[{ return N->isDivergent(); }]>;
class ReverseDivergentBinFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0, node:$src1),
(Op $src1, $src0),
[{ return N->isDivergent(); }]>;
class DivergentPrivateStoreFrag<SDPatternOperator Op> : PatFrag<
(ops node:$src0, node:$src1),
(Op $src0, $src1),

View File

@ -64,23 +64,26 @@ class DivergentNonPriStPat<PatFrag StoreOp, RVInst Inst>
(XLenVT VGPR:$vs3), (AddrRegReg GPR:$rs1, (XLenVT VGPR:$vs2))),
(Inst VGPR:$vs3, GPR:$rs1, VGPR:$vs2)>;
// RVV VV, VF instruction pattern class for floating point ternary operations
multiclass PatVFRTer<list<SDPatternOperator> Ops, list<RVInst> Insts> {
// RVV instruction pattern class for float/interger ternary operations
// Dst is used to identify vx and vf
multiclass PatVXFTer<list<SDPatternOperator> Ops, DAGOperand Dst, ValueType Ty,
list<RVInst> Insts> {
def : Pat<(Ops[0] (Ops[1] (f32 VGPR:$rs1), (f32 VGPR:$rs2)),
(f32 VGPR:$rs3)), (Insts[0] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>;
def : Pat<(Ops[0] (Ops[1] (Ty VGPR:$rs1), (Ty VGPR:$rs2)),
(Ty VGPR:$rs3)), (Insts[0] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>;
def : Pat<(Ops[0] (Ops[1] GPRF32:$rs1, (f32 VGPR:$rs2)),
(f32 VGPR:$rs3)), (Insts[1] VGPR:$rs2, GPRF32:$rs1, VGPR:$rs3)>;
}
def : Pat<(Ops[0] (Ops[1] (Ty Dst:$rs1), (Ty VGPR:$rs2)),
(Ty VGPR:$rs3)), (Insts[1] VGPR:$rs2, Dst:$rs1, VGPR:$rs3)>;
multiclass AnyPatVFRTer<PatFrags Op, list<RVInst> Insts> {
// For fneg operation
if !eq(!size(Insts), 4) then {
def : Pat<(Ops[0] (Ops[1] (fneg (Ty VGPR:$rs1)), (Ty VGPR:$rs2)),
(Ty VGPR:$rs3)), (Insts[2] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>;
def : Pat<(Op (f32 VGPR:$rs1), (f32 VGPR:$rs2), (f32 VGPR:$rs3)),
(Insts[0] VGPR:$rs2, VGPR:$rs1, VGPR:$rs3)>;
def : Pat<(Ops[0] (Ops[1] (fneg (Ty Dst:$rs1)), (Ty VGPR:$rs2)),
(Ty VGPR:$rs3)), (Insts[3] VGPR:$rs2, Dst:$rs1, VGPR:$rs3)>;
}
def : Pat<(Op GPRF32:$rs1, (f32 VGPR:$rs2), (f32 VGPR:$rs3)),
(Insts[1] VGPR:$rs2, GPRF32:$rs1, VGPR:$rs3)>;
}
// Complex VV, VX, VI instruction pattern class for integers select operation
@ -104,7 +107,7 @@ multiclass SleOpePatVXIBin<list<PatFrags> Ops, list<RVInst> Insts> {
// Setcc pattern for float operations
class PatFloatSetCC<list<DAGOperand> Ty, CondCode Cond, RVInst Inst>
: Pat<(DivergentTernaryFrag<any_fsetcc> (f32 Ty[0]:$rs1), (f32 Ty[1]:$rs2), Cond),
(i32 (Inst Ty[0]:$rs1, Ty[1]:$rs2))>;
(XLenVT (Inst Ty[0]:$rs1, Ty[1]:$rs2))>;
// Float/integer type convert pattern
class PatFXConvert<PatFrag Frag, list<ValueType> Ty, RVInst Inst>
@ -246,8 +249,8 @@ class VALUVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
// op vd, rs1, vs2 (reverse the order of rs1 and vs2)
class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
: RVInstVX<funct6, opv, (outs VGPR:$vd),
(ins GPR:$rs1, VGPR:$vs2),
: RVInstVX<funct6, opv, (outs VGPR:$vd_wb),
(ins GPR:$rs1, VGPR:$vs2, VGPR:$vd),
opcodestr, "$vd, $rs1, $vs2">;
// op vd, vs2, imm
@ -877,11 +880,14 @@ defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Single-Width Integer Multiply-Add Instructions
defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>;
defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>;
defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
let Constraints = "$vd_wb = $vs2" in {
defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>;
defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>;
}
let Constraints = "$vd_wb = $vd" in {
defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
}
// Vector Widening Integer Multiply-Add Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
@ -1175,12 +1181,12 @@ def : PatFloatSetCC<[VGPR, GPRF32], SETOGT, VMFGT_VF>;
def : PatFloatSetCC<[VGPR, GPRF32], SETOGE, VMFGE_VF>;
// TODO: add vfcvt.rtz
def : PatFXConvert<DivergentUnaryFrag<any_fp_to_sint>, [i32, f32], VFCVT_X_F_V>;
def : PatFXConvert<DivergentUnaryFrag<any_fp_to_sint>, [XLenVT, f32], VFCVT_X_F_V>;
def : PatFXConvert<DivergentUnaryFrag<any_fp_to_uint>,
[i32, f32], VFCVT_XU_F_V>;
def : PatFXConvert<DivergentUnaryFrag<any_sint_to_fp>, [f32, i32], VFCVT_F_X_V>;
[XLenVT, f32], VFCVT_XU_F_V>;
def : PatFXConvert<DivergentUnaryFrag<any_sint_to_fp>, [f32, XLenVT], VFCVT_F_X_V>;
def : PatFXConvert<DivergentUnaryFrag<any_uint_to_fp>,
[f32, i32], VFCVT_F_XU_V>;
[f32, XLenVT], VFCVT_F_XU_V>;
// Patterns for vrsub.vx and vrsub.vi
def : Pat<(sub GPR:$rs1, VGPR:$rs2), (VRSUB_VX VGPR:$rs2, GPR:$rs1)>;
@ -1195,14 +1201,18 @@ defm : PatVFRBin<DivergentBinFrag<fminimum>, [VFMIN_VV, VFMIN_VF]>;
defm : PatVFRBin<DivergentBinFrag<fmaximum>, [VFMAX_VV, VFMAX_VF]>;
// Patterns for ternary operations
// vfmadd.vv && vfmadd.vf
defm : PatVFRTer<[DivergentBinFrag<fadd>, DivergentBinFrag<fmul>],
[VFMADD_VV, VFMADD_VF]>;
defm : AnyPatVFRTer<any_fma, [VFMADD_VV, VFMADD_VF]>;
// TODO: vmacc/vfmacc, vnmsac/vfnmsac
defm : PatVXFTer<[DivergentBinFrag<fadd>, DivergentBinFrag<fmul>], GPRF32,
f32, [VFMADD_VV, VFMADD_VF]>;
defm : PatVXFTer<[DivergentBinFrag<fsub>, DivergentBinFrag<fmul>], GPRF32,
f32, [VFMSUB_VV, VFMSUB_VF, VFNMADD_VV, VFNMADD_VF]>;
defm : PatVXFTer<[ReverseDivergentBinFrag<fsub>, DivergentBinFrag<fmul>],
GPRF32, f32, [VFNMSUB_VV, VFNMSUB_VF]>;
defm : PatVXFTer<[DivergentBinFrag<add>, DivergentBinFrag<mul>], GPR,
XLenVT, [VMADD_VV, VMADD_VX]>;
defm : PatVXFTer<[ReverseDivergentBinFrag<sub>, DivergentBinFrag<mul>],
GPR, XLenVT, [VNMSUB_VV, VNMSUB_VX]>;
// vfmsub.vv && vfmsub.vf
defm : PatVFRTer<[DivergentBinFrag<fsub>, DivergentBinFrag<fmul>],
[VFMSUB_VV, VFMSUB_VF]>;
// vfsqrt.v
// TODO: vfrec7.v? what is this
def : Pat<(any_fsqrt (f32 VGPR:$rs1)), (VFSQRT_V (f32 VGPR:$rs1))>;

View File

@ -37,15 +37,14 @@ define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: vmv.s.x v0, zero
; VENTUS-NEXT: slli a0, a0, 2
; VENTUS-NEXT: add s2, s2, a0
; VENTUS-NEXT: vlw v1, zero(s2)
; VENTUS-NEXT: add s1, s1, a0
; VENTUS-NEXT: lw a1, 0(s1)
; VENTUS-NEXT: vlw v1, zero(s2)
; VENTUS-NEXT: vmv.s.x v2, a1
; VENTUS-NEXT: add a0, s0, a0
; VENTUS-NEXT: lw a1, 0(a0)
; VENTUS-NEXT: vmul.vv v1, v2, v1
; VENTUS-NEXT: lw a2, 0(a0)
; VENTUS-NEXT: vmv.s.x v2, a1
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vmv.s.x v3, a2
; VENTUS-NEXT: vmadd.vv v1, v2, v3
; VENTUS-NEXT: vsuxei32.v v1, (a0), v0
; VENTUS-NEXT: j .LBB0_3
; VENTUS-NEXT: .LBB0_2: # %if.else

View File

@ -6,16 +6,17 @@
define i32 @foo(i32 noundef %cond, i32 noundef %a, i32 noundef %b, i32 noundef %c) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmv.s.x v4, zero
; VENTUS-NEXT: vmul.vv v1, v2, v1
; VENTUS-NEXT: vbeq v0, v4, .LBB0_2
; VENTUS-NEXT: vadd.vx v4, v0, zero
; VENTUS-NEXT: vmv.s.x v5, zero
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: vbeq v4, v5, .LBB0_2
; VENTUS-NEXT: # %bb.1: # %entry
; VENTUS-NEXT: join .LBB0_3
; VENTUS-NEXT: .LBB0_2: # %entry
; VENTUS-NEXT: vrsub.vi v3, v3, 0
; VENTUS-NEXT: join .LBB0_3
; VENTUS-NEXT: .LBB0_3: # %entry
; VENTUS-NEXT: vadd.vv v0, v3, v1
; VENTUS-NEXT: vmadd.vv v0, v2, v3
; VENTUS-NEXT: ret
entry:
%tobool.not = icmp eq i32 %cond, 0

View File

@ -312,3 +312,116 @@ entry:
%conv = uitofp i32 %a to float
ret float %conv
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local float @fmadd_v(float noundef %a, float noundef %b, float noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fmadd_v:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vfmadd.vv v1, v0, v2
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: ret
entry:
%mul = fmul float %a, %b
%add = fadd float %mul, %c
ret float %add
}
; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
define dso_local float @fmadd_f(float noundef %a, float noundef %b, float noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fmadd_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(.LCPI19_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI19_0)(a0)
; VENTUS-NEXT: vmv.s.x v0, a0
; VENTUS-NEXT: vfmadd.vv v0, v1, v2
; VENTUS-NEXT: ret
entry:
%0 = tail call float @llvm.fmuladd.f32(float %b, float 0x3FF3333340000000, float %c)
ret float %0
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local float @fnmadd_v(float noundef %a, float noundef %b, float noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fnmadd_v:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vfnmadd.vv v1, v0, v2
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: ret
entry:
%0 = fneg float %a
%fneg = fmul float %0, %b
%sub = fsub float %fneg, %c
ret float %sub
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local float @fnmadd_f(float noundef %a, float noundef %b, float noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fnmadd_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(.LCPI21_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI21_0)(a0)
; VENTUS-NEXT: vmv.s.x v0, a0
; VENTUS-NEXT: vfmsub.vv v0, v1, v2
; VENTUS-NEXT: ret
entry:
%fneg = fmul float %b, 0xBFF3333340000000
%sub = fsub float %fneg, %c
ret float %sub
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local float @fmsub_v(float noundef %a, float noundef %b, float noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fmsub_v:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vfmsub.vv v1, v0, v2
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: ret
entry:
%mul = fmul float %a, %b
%sub = fsub float %mul, %c
ret float %sub
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local float @fmsub_f(float noundef %a, float noundef %b) local_unnamed_addr {
; VENTUS-LABEL: fmsub_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(.LCPI23_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI23_0)(a0)
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vfmsub.vv v2, v0, v1
; VENTUS-NEXT: vadd.vx v0, v2, zero
; VENTUS-NEXT: ret
entry:
%mul = fmul float %a, 0x3FF3333340000000
%sub = fsub float %mul, %b
ret float %sub
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local float @fnmsub_v(float noundef %a, float noundef %b, float noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fnmsub_v:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vfnmsub.vv v1, v0, v2
; VENTUS-NEXT: vadd.vx v0, v1, zero
; VENTUS-NEXT: ret
entry:
%0 = fmul float %a, %b
%add = fsub float %c, %0
ret float %add
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local float @fnmsub_f(float noundef %a, float noundef %b, float noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fnmsub_f:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: lui a0, %hi(.LCPI25_0)
; VENTUS-NEXT: lw a0, %lo(.LCPI25_0)(a0)
; VENTUS-NEXT: vmv.s.x v0, a0
; VENTUS-NEXT: vfmadd.vv v0, v1, v2
; VENTUS-NEXT: ret
entry:
%fneg = fmul float %b, 0x3FF3333340000000
%0 = fsub float %c, %fneg
ret float %0
}

View File

@ -204,3 +204,79 @@ define i32 @vrsub_bigimm(i32 %a) nounwind {
%1 = sub i32 65536, %a
ret i32 %1
}
define dso_local i32 @fmadd_v(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr {
; VENTUS-LABEL: fmadd_v:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmadd.vv v0, v1, v2
; VENTUS-NEXT: ret
entry:
%mul = mul nsw i32 %b, %a
%add = add nsw i32 %mul, %c
ret i32 %add
}
define dso_local i32 @fmadd_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: fmadd_x:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vsll.vi v2, v0, 2
; VENTUS-NEXT: vadd.vv v0, v2, v0
; VENTUS-NEXT: vadd.vv v0, v0, v1
; VENTUS-NEXT: ret
entry:
%mul = mul nsw i32 %a, 5
%add = add nsw i32 %mul, %b
ret i32 %add
}
define dso_local i32 @nmsub_v(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr {
; VENTUS-LABEL: nmsub_v:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vnmsub.vv v0, v1, v2
; VENTUS-NEXT: ret
entry:
%mul = mul nsw i32 %b, %a
%add = sub i32 %c, %mul
ret i32 %add
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @nmsub_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: nmsub_x:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: li a0, -11
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vmadd.vv v2, v0, v1
; VENTUS-NEXT: vadd.vx v0, v2, zero
; VENTUS-NEXT: ret
entry:
%mul.neg = mul i32 %a, -11
%add = add nsw i32 %mul.neg, %b
ret i32 %add
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @madd_v(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_unnamed_addr {
; VENTUS-LABEL: madd_v:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: vmadd.vv v0, v1, v2
; VENTUS-NEXT: ret
entry:
%mul = mul nsw i32 %b, %a
%add = add nsw i32 %mul, %c
ret i32 %add
}
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
define dso_local i32 @madd_x(i32 noundef %a, i32 noundef %b) local_unnamed_addr {
; VENTUS-LABEL: madd_x:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: li a0, 11
; VENTUS-NEXT: vmv.s.x v2, a0
; VENTUS-NEXT: vmadd.vv v2, v0, v1
; VENTUS-NEXT: vadd.vx v0, v2, zero
; VENTUS-NEXT: ret
entry:
%mul = mul nsw i32 %a, 11
%add = add nsw i32 %mul, %b
ret i32 %add
}