[VP][RISCV] Add vp.fmuladd.

Expanded in SelectionDAGBuilder similar to llvm.fmuladd.

Reviewed By: frasercrmck, simoll

Differential Revision: https://reviews.llvm.org/D134474
This commit is contained in:
Craig Topper 2022-09-27 09:35:21 -07:00
parent b4869f2fa7
commit a6383bb51c
7 changed files with 8648 additions and 0 deletions

View File

@ -16692,6 +16692,8 @@ The canonicalization operation may be optimized away if:
- The result is consumed only by (or fused with) other floating-point
operations. That is, the bits of the floating-point value are not examined.
.. _int_fmuladd:
'``llvm.fmuladd.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -19389,6 +19391,57 @@ Examples:
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
.. _int_vp_fmuladd:
'``llvm.vp.fmuladd.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x float> @llvm.vp.fmuladd.v16f32 (<16 x float> <left_op>, <16 x float> <middle_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x float> @llvm.vp.fmuladd.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <middle_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x double> @llvm.vp.fmuladd.v256f64 (<256 x double> <left_op>, <256 x double> <middle_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
Overview:
"""""""""
Predicated floating-point multiply-add of two vectors of floating-point values
that can be fused if code generator determines that (a) the target instruction
set has support for a fused operation, and (b) that the fused operation is more
efficient than the equivalent, separate pair of mul and add instructions.
Arguments:
""""""""""
The first three operands and the result have the same vector of floating-point
type. The fourth operand is the vector mask and has the same number of elements
as the result vector type. The fifth operand is the explicit vector length of
the operation.
Semantics:
""""""""""
The '``llvm.vp.fmuladd``' intrinsic performs floating-point multiply-add (:ref:`llvm.fuladd <int_fmuladd>`)
of the first, second, and third vector operand on each enabled lane. The result
on disabled lanes is a :ref:`poison value <poisonvalues>`. The operation is
performed in the default floating-point environment.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
.. _int_vp_reduce_add:
'``llvm.vp.reduce.add.*``' Intrinsics

View File

@ -1575,6 +1575,12 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_fmuladd : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_minnum : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,

View File

@ -248,6 +248,11 @@ BEGIN_REGISTER_VP(vp_fma, 3, 4, VP_FMA, -1)
VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_fma)
END_REGISTER_VP(vp_fma, VP_FMA)
// llvm.vp.fmuladd(x,y,z,mask,vlen)
BEGIN_REGISTER_VP(vp_fmuladd, 3, 4, VP_FMULADD, -1)
VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_fmuladd)
END_REGISTER_VP(vp_fmuladd, VP_FMULADD)
// llvm.vp.minnum(x, y, mask,vlen)
BEGIN_REGISTER_VP(vp_minnum, 2, 3, VP_FMINNUM, -1)
END_REGISTER_VP(vp_minnum, VP_FMINNUM)

View File

@ -7699,6 +7699,25 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
visitVPStridedStore(VPIntrin, OpValues);
break;
case ISD::VP_FMULADD: {
assert(OpValues.size() == 5 && "Unexpected number of operands");
SDNodeFlags SDFlags;
if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
SDFlags.copyFMF(*FPMO);
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) {
setValue(&VPIntrin, DAG.getNode(ISD::VP_FMA, DL, VTs, OpValues, SDFlags));
} else {
SDValue Mul = DAG.getNode(
ISD::VP_FMUL, DL, VTs,
{OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, SDFlags);
SDValue Add =
DAG.getNode(ISD::VP_FADD, DL, VTs,
{Mul, OpValues[2], OpValues[3], OpValues[4]}, SDFlags);
setValue(&VPIntrin, Add);
}
break;
}
}
}

View File

@ -0,0 +1,838 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v9, v10
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <2 x half> poison, half %b, i32 0
%vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9
; CHECK-NEXT: ret
%elt.head = insertelement <2 x half> poison, half %b, i32 0
%vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
declare <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v9, v10
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <4 x half> poison, half %b, i32 0
%vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9
; CHECK-NEXT: ret
%elt.head = insertelement <4 x half> poison, half %b, i32 0
%vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
declare <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v9, v10
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9
; CHECK-NEXT: ret
%elt.head = insertelement <8 x half> poison, half %b, i32 0
%vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
declare <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v10, v12
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <16 x half> poison, half %b, i32 0
%vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v10
; CHECK-NEXT: ret
%elt.head = insertelement <16 x half> poison, half %b, i32 0
%vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
declare <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v9, v10
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <2 x float> poison, float %b, i32 0
%vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9
; CHECK-NEXT: ret
%elt.head = insertelement <2 x float> poison, float %b, i32 0
%vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v9, v10
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <4 x float> poison, float %b, i32 0
%vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9
; CHECK-NEXT: ret
%elt.head = insertelement <4 x float> poison, float %b, i32 0
%vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
declare <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v10, v12
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <8 x float> poison, float %b, i32 0
%vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v10
; CHECK-NEXT: ret
%elt.head = insertelement <8 x float> poison, float %b, i32 0
%vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
declare <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v12, v16
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <16 x float> poison, float %b, i32 0
%vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v12
; CHECK-NEXT: ret
%elt.head = insertelement <16 x float> poison, float %b, i32 0
%vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
declare <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v9, v10
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <2 x double> poison, double %b, i32 0
%vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v9
; CHECK-NEXT: ret
%elt.head = insertelement <2 x double> poison, double %b, i32 0
%vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
declare <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v10, v12
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <4 x double> poison, double %b, i32 0
%vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v10
; CHECK-NEXT: ret
%elt.head = insertelement <4 x double> poison, double %b, i32 0
%vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
declare <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v12, v16
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <8 x double> poison, double %b, i32 0
%vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v12
; CHECK-NEXT: ret
%elt.head = insertelement <8 x double> poison, double %b, i32 0
%vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
declare <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32)
define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v15f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v16, v24
; CHECK-NEXT: ret
%head = insertelement <15 x i1> poison, i1 true, i32 0
%m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
%v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
declare <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32)
define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v24, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfmadd.vv v8, v16, v24
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v16, v0.t
; CHECK-NEXT: ret
%elt.head = insertelement <16 x double> poison, double %b, i32 0
%vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vf_v16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfmadd.vf v8, fa0, v16
; CHECK-NEXT: ret
%elt.head = insertelement <16 x double> poison, double %b, i32 0
%vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
declare <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32)
define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 48
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: addi a1, a2, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 40
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a4, a3, .LBB50_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB50_2:
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a4, a0, .LBB50_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB50_4:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 40
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 48
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}
define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) {
; CHECK-LABEL: vfma_vv_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: addi a1, a2, 128
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: addi a1, a0, 128
; CHECK-NEXT: vle64.v v24, (a1)
; CHECK-NEXT: addi a3, a4, -16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: bltu a4, a3, .LBB51_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a3
; CHECK-NEXT: .LBB51_2:
; CHECK-NEXT: vle64.v v8, (a2)
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v24, v16, v8
; CHECK-NEXT: bltu a4, a0, .LBB51_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB51_4:
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, mu
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfmadd.vv v0, v16, v8
; CHECK-NEXT: vmv.v.v v8, v0
; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
%v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}

File diff suppressed because it is too large Load Diff

View File

@ -60,6 +60,8 @@ protected:
"i32)";
Str << " declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, "
"<8 x float>, <8 x i1>, i32) ";
Str << " declare <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float>, "
"<8 x float>, <8 x float>, <8 x i1>, i32) ";
Str << " declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, "
"<8 x i1>, i32) ";