[VP][RISCV] Add vp.floor, vp.round, vp.roundeven and their RISC-V support

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D134759
This commit is contained in:
eopXD 2022-09-27 13:03:25 -07:00
parent 95d2367647
commit 9677d70eb2
12 changed files with 4841 additions and 9 deletions

View File

@ -14727,6 +14727,8 @@ Semantics:
This function returns the same values as the libm ``copysign``
functions would, and handles error conditions in the same way.
.. _int_floor:
'``llvm.floor.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -14908,6 +14910,8 @@ Semantics:
This function returns the same values as the libm ``nearbyint``
functions would, and handles error conditions in the same way.
.. _int_round:
'``llvm.round.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -14944,6 +14948,8 @@ Semantics:
This function returns the same values as the libm ``round``
functions would, and handles error conditions in the same way.
.. _int_roundeven:
'``llvm.roundeven.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -21462,6 +21468,147 @@ Examples:
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
.. _int_vp_floor:
'``llvm.vp.floor.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x float> @llvm.vp.floor.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x float> @llvm.vp.floor.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x double> @llvm.vp.floor.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
Overview:
"""""""""
Predicated floating-point floor of a vector of floating-point values.
Arguments:
""""""""""
The first operand and the result have the same vector of floating-point type.
The second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.
Semantics:
""""""""""
The '``llvm.vp.floor``' intrinsic performs floating-point floor
(:ref:`floor <int_floor>`) of the first vector operand on each enabled lane. The
result on disabled lanes is undefined.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
.. _int_vp_round:
'``llvm.vp.round.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x float> @llvm.vp.round.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x float> @llvm.vp.round.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x double> @llvm.vp.round.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
Overview:
"""""""""
Predicated floating-point round of a vector of floating-point values.
Arguments:
""""""""""
The first operand and the result have the same vector of floating-point type.
The second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.
Semantics:
""""""""""
The '``llvm.vp.round``' intrinsic performs floating-point round
(:ref:`round <int_round>`) of the first vector operand on each enabled lane. The
result on disabled lanes is undefined.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
.. _int_vp_roundeven:
'``llvm.vp.roundeven.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x float> @llvm.vp.roundeven.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x double> @llvm.vp.roundeven.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
Overview:
"""""""""
Predicated floating-point roundeven of a vector of floating-point values.
Arguments:
""""""""""
The first operand and the result have the same vector of floating-point type.
The second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.
Semantics:
""""""""""
The '``llvm.vp.roundeven``' intrinsic performs floating-point roundeven
(:ref:`roundeven <int_roundeven>`) of the first vector operand on each enabled lane. The
result on disabled lanes is undefined.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
.. _int_mload_mstore:
Masked Vector Load and Store Intrinsics

View File

@ -1595,6 +1595,18 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_floor : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_round : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_roundeven : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
// Casts
def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],

View File

@ -265,6 +265,18 @@ END_REGISTER_VP(vp_maxnum, VP_FMAXNUM)
BEGIN_REGISTER_VP(vp_ceil, 1, 2, VP_FCEIL, -1)
END_REGISTER_VP(vp_ceil, VP_FCEIL)
// llvm.vp.floor(x,mask,vlen)
BEGIN_REGISTER_VP(vp_floor, 1, 2, VP_FFLOOR, -1)
END_REGISTER_VP(vp_floor, VP_FFLOOR)
// llvm.vp.round(x,mask,vlen)
BEGIN_REGISTER_VP(vp_round, 1, 2, VP_FROUND, -1)
END_REGISTER_VP(vp_round, VP_FROUND)
// llvm.vp.roundeven(x,mask,vlen)
BEGIN_REGISTER_VP(vp_roundeven, 1, 2, VP_FROUNDEVEN, -1)
END_REGISTER_VP(vp_roundeven, VP_FROUNDEVEN)
///// } Floating-Point Arithmetic
///// Type Casts {

View File

@ -1025,6 +1025,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FEXP:
case ISD::FEXP2:
case ISD::FFLOOR:
case ISD::VP_FFLOOR:
case ISD::FLOG:
case ISD::FLOG10:
case ISD::FLOG2:
@ -1042,7 +1043,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
case ISD::VP_FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT: case ISD::VP_SQRT:
case ISD::FTRUNC:
@ -4086,6 +4089,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FABS:
case ISD::VP_SQRT:
case ISD::VP_FCEIL:
case ISD::VP_FFLOOR:
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FCANONICALIZE:

View File

@ -451,7 +451,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
ISD::VP_FCEIL};
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@ -1946,21 +1947,27 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
switch (Opc) {
case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
case ISD::FROUNDEVEN:
case ISD::VP_FROUNDEVEN:
return RISCVFPRndMode::RNE;
case ISD::FTRUNC: return RISCVFPRndMode::RTZ;
case ISD::FFLOOR: return RISCVFPRndMode::RDN;
case ISD::FFLOOR:
case ISD::VP_FFLOOR:
return RISCVFPRndMode::RDN;
case ISD::FCEIL:
case ISD::VP_FCEIL:
return RISCVFPRndMode::RUP;
case ISD::FROUND: return RISCVFPRndMode::RMM;
case ISD::FROUND:
case ISD::VP_FROUND:
return RISCVFPRndMode::RMM;
}
return RISCVFPRndMode::Invalid;
}
// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND and VP_FCEIL by converting to the
// integer domain/ and back. Taking care to avoid converting values that are nan
// or already correct.
// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
// and VP_FROUNDEVEN by converting to the integer domain and back. Taking care
// to avoid converting values that are nan or already correct.
static SDValue
lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@ -1978,7 +1985,10 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
}
SDValue Mask, VL;
bool IsVP = Op->getOpcode() == ISD::VP_FCEIL;
bool IsVP = Op->getOpcode() == ISD::VP_FCEIL ||
Op->getOpcode() == ISD::VP_FFLOOR ||
Op->getOpcode() == ISD::VP_FROUND ||
Op->getOpcode() == ISD::VP_FROUNDEVEN;
if (IsVP) {
Mask = Op.getOperand(1);
@ -2024,7 +2034,10 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
case ISD::FCEIL:
case ISD::VP_FCEIL:
case ISD::FFLOOR:
case ISD::FROUND: {
case ISD::VP_FFLOOR:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN: {
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
assert(FRM != RISCVFPRndMode::Invalid);
Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src,
@ -3949,6 +3962,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
return lowerVPStridedStore(Op, DAG);
case ISD::VP_FCEIL:
case ISD::VP_FFLOOR:
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN:
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
}
}

View File

@ -0,0 +1,749 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.floor.v2f16(<2 x half>, <2 x i1>, i32)
define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
declare <4 x half> @llvm.vp.floor.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
declare <8 x half> @llvm.vp.floor.v8f16(<8 x half>, <8 x i1>, i32)
define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
declare <16 x half> @llvm.vp.floor.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
declare <2 x float> @llvm.vp.floor.v2f32(<2 x float>, <2 x i1>, i32)
define <2 x float> @vp_floor_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vp_floor_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
declare <4 x float> @llvm.vp.floor.v4f32(<4 x float>, <4 x i1>, i32)
define <4 x float> @vp_floor_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vp_floor_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vp_floor_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
declare <16 x float> @llvm.vp.floor.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
declare <2 x double> @llvm.vp.floor.v2f64(<2 x double>, <2 x i1>, i32)
define <2 x double> @vp_floor_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
declare <15 x double> @llvm.vp.floor.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v15f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <15 x i1> poison, i1 true, i32 0
%m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
%v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}
define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vmset.m v1
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
%v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}

View File

@ -0,0 +1,749 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.round.v2f16(<2 x half>, <2 x i1>, i32)
define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
declare <4 x half> @llvm.vp.round.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
declare <8 x half> @llvm.vp.round.v8f16(<8 x half>, <8 x i1>, i32)
define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
declare <16 x half> @llvm.vp.round.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
declare <2 x float> @llvm.vp.round.v2f32(<2 x float>, <2 x i1>, i32)
define <2 x float> @vp_round_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.round.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vp_round_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.round.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
declare <4 x float> @llvm.vp.round.v4f32(<4 x float>, <4 x i1>, i32)
define <4 x float> @vp_round_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vp_round_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vp_round_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
declare <16 x float> @llvm.vp.round.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
declare <2 x double> @llvm.vp.round.v2f64(<2 x double>, <2 x i1>, i32)
define <2 x double> @vp_round_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
declare <15 x double> @llvm.vp.round.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v15f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <15 x i1> poison, i1 true, i32 0
%m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
%v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}
define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vmset.m v1
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
%v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}

View File

@ -0,0 +1,749 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.roundeven.v2f16(<2 x half>, <2 x i1>, i32)
define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
declare <4 x half> @llvm.vp.roundeven.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
declare <8 x half> @llvm.vp.roundeven.v8f16(<8 x half>, <8 x i1>, i32)
define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
declare <16 x half> @llvm.vp.roundeven.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
declare <2 x float> @llvm.vp.roundeven.v2f32(<2 x float>, <2 x i1>, i32)
define <2 x float> @vp_roundeven_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vp_roundeven_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
declare <4 x float> @llvm.vp.roundeven.v4f32(<4 x float>, <4 x i1>, i32)
define <4 x float> @vp_roundeven_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vp_roundeven_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vp_roundeven_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
declare <16 x float> @llvm.vp.roundeven.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
declare <2 x double> @llvm.vp.roundeven.v2f64(<2 x double>, <2 x i1>, i32)
define <2 x double> @vp_roundeven_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
declare <15 x double> @llvm.vp.roundeven.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v15f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <15 x i1> poison, i1 true, i32 0
%m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
%v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}
define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vmset.m v1
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
%v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}

View File

@ -0,0 +1,795 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_floor_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
define <vscale x 1 x half> @vp_floor_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x half> @llvm.vp.floor.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
declare <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
define <vscale x 2 x half> @vp_floor_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
define <vscale x 2 x half> @vp_floor_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x half> @llvm.vp.floor.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
declare <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
define <vscale x 4 x half> @vp_floor_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
define <vscale x 4 x half> @vp_floor_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x half> @llvm.vp.floor.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
declare <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vp_floor_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x half> @llvm.vp.floor.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
declare <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
define <vscale x 16 x half> @vp_floor_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x half> @llvm.vp.floor.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
declare <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
define <vscale x 32 x half> @vp_floor_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv32f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x half> @llvm.vp.floor.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
declare <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
define <vscale x 1 x float> @vp_floor_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
define <vscale x 1 x float> @vp_floor_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
declare <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
define <vscale x 2 x float> @vp_floor_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
define <vscale x 2 x float> @vp_floor_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
declare <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
define <vscale x 4 x float> @vp_floor_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
define <vscale x 4 x float> @vp_floor_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
declare <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
define <vscale x 8 x float> @vp_floor_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vp_floor_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
declare <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
define <vscale x 16 x float> @vp_floor_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
define <vscale x 16 x float> @vp_floor_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
declare <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
define <vscale x 1 x double> @vp_floor_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
define <vscale x 1 x double> @vp_floor_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv1f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
declare <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
define <vscale x 2 x double> @vp_floor_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
declare <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
define <vscale x 4 x double> @vp_floor_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
declare <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
define <vscale x 7 x double> @vp_floor_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv7f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
%v = call <vscale x 7 x double> @llvm.vp.floor.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
declare <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vp_floor_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
; Test splitting.
declare <vscale x 16 x double> @llvm.vp.floor.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a4, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vslidedown.vx v0, v0, a4
; CHECK-NEXT: bltu a0, a3, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB32_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB32_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <vscale x 16 x double> @llvm.vp.floor.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vp_floor_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_floor_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB33_2:
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 2
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: sub a1, a0, a1
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB33_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: .LBB33_4:
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x double> @llvm.vp.floor.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}

View File

@ -0,0 +1,795 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_round_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
define <vscale x 1 x half> @vp_round_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x half> @llvm.vp.round.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
declare <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
define <vscale x 2 x half> @vp_round_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
define <vscale x 2 x half> @vp_round_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x half> @llvm.vp.round.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
declare <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
define <vscale x 4 x half> @vp_round_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x half> @llvm.vp.round.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
declare <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vp_round_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x half> @llvm.vp.round.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
declare <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
define <vscale x 16 x half> @vp_round_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x half> @llvm.vp.round.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
declare <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv32f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x half> @llvm.vp.round.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
declare <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
define <vscale x 1 x float> @vp_round_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
define <vscale x 1 x float> @vp_round_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
declare <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
define <vscale x 2 x float> @vp_round_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
define <vscale x 2 x float> @vp_round_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
declare <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
define <vscale x 4 x float> @vp_round_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
define <vscale x 4 x float> @vp_round_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
declare <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
define <vscale x 8 x float> @vp_round_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vp_round_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
declare <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
define <vscale x 16 x float> @vp_round_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
define <vscale x 16 x float> @vp_round_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
declare <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
define <vscale x 1 x double> @vp_round_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
define <vscale x 1 x double> @vp_round_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv1f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
declare <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
define <vscale x 2 x double> @vp_round_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
declare <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
define <vscale x 4 x double> @vp_round_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
declare <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
define <vscale x 7 x double> @vp_round_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv7f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
%v = call <vscale x 7 x double> @llvm.vp.round.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
declare <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vp_round_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
; Test splitting.
declare <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a4, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vslidedown.vx v0, v0, a4
; CHECK-NEXT: bltu a0, a3, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB32_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB32_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vp_round_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_round_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB33_2:
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 4
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: sub a1, a0, a1
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB33_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: .LBB33_4:
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 4
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x double> @llvm.vp.round.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}

View File

@ -0,0 +1,795 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_roundeven_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
define <vscale x 1 x half> @vp_roundeven_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x half> @llvm.vp.roundeven.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
declare <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
define <vscale x 2 x half> @vp_roundeven_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
define <vscale x 2 x half> @vp_roundeven_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x half> @llvm.vp.roundeven.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
declare <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
define <vscale x 4 x half> @vp_roundeven_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x half> @llvm.vp.roundeven.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
declare <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vp_roundeven_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x half> @llvm.vp.roundeven.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
declare <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
define <vscale x 16 x half> @vp_roundeven_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x half> @llvm.vp.roundeven.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
declare <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv32f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x half> @llvm.vp.roundeven.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
declare <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
define <vscale x 1 x float> @vp_roundeven_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
define <vscale x 1 x float> @vp_roundeven_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
declare <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
define <vscale x 2 x float> @vp_roundeven_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
define <vscale x 2 x float> @vp_roundeven_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
declare <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
define <vscale x 4 x float> @vp_roundeven_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
define <vscale x 4 x float> @vp_roundeven_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
declare <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
define <vscale x 8 x float> @vp_roundeven_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vp_roundeven_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
declare <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
define <vscale x 16 x float> @vp_roundeven_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
define <vscale x 16 x float> @vp_roundeven_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
declare <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
define <vscale x 1 x double> @vp_roundeven_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
define <vscale x 1 x double> @vp_roundeven_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
declare <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
define <vscale x 2 x double> @vp_roundeven_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
declare <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
define <vscale x 4 x double> @vp_roundeven_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
declare <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
define <vscale x 7 x double> @vp_roundeven_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
%v = call <vscale x 7 x double> @llvm.vp.roundeven.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
declare <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vp_roundeven_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
; Test splitting.
declare <vscale x 16 x double> @llvm.vp.roundeven.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a4, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vslidedown.vx v0, v0, a4
; CHECK-NEXT: bltu a0, a3, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB32_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB32_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <vscale x 16 x double> @llvm.vp.roundeven.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vp_roundeven_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB33_2:
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 0
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: sub a1, a0, a1
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB33_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: .LBB33_4:
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 0
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x double> @llvm.vp.roundeven.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}

View File

@ -52,6 +52,13 @@ protected:
Str << " declare <8 x float> @llvm.vp." << BinaryFPOpcode
<< ".v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) ";
Str << " declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x "
"i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, "