[VP][RISCV] Add vp.rint and RISC-V support.

FRINT uses dynamic rounding mode instead of static rounding mode. The patch
rename VFCVT_X_F_VL to VFCVT_RM_X_F_VL for static rounding mode uses and added
new ISDNode VFCVT_X_F_VL directly selected to PseudoVFCVT_X_F_V.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D136662
This commit is contained in:
Yeting Kuo 2022-10-24 23:40:48 +08:00
parent 11d844f96d
commit 71e4e35581
12 changed files with 1633 additions and 15 deletions

View File

@ -14894,6 +14894,8 @@ Semantics:
This function returns the same values as the libm ``trunc`` functions
would, and handles error conditions in the same way.
.. _int_rint:
'``llvm.rint.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -21814,6 +21816,53 @@ Examples:
%t = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
.. _int_vp_rint:
'``llvm.vp.rint.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x float> @llvm.vp.rint.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x float> @llvm.vp.rint.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x double> @llvm.vp.rint.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
Overview:
"""""""""
Predicated floating-point rint of a vector of floating-point values.
Arguments:
""""""""""
The first operand and the result have the same vector of floating-point type.
The second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.
Semantics:
""""""""""
The '``llvm.vp.rint``' intrinsic performs floating-point rint
(:ref:`rint <int_rint>`) of the first vector operand on each enabled lane.
The result on disabled lanes is a :ref:`poison value <poisonvalues>`.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison
.. _int_vp_round:
'``llvm.vp.round.*``' Intrinsics

View File

@ -1637,6 +1637,10 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_rint : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
// Casts
def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],

View File

@ -306,6 +306,10 @@ END_REGISTER_VP(vp_roundeven, VP_FROUNDEVEN)
BEGIN_REGISTER_VP(vp_roundtozero, 1, 2, VP_FROUNDTOZERO, -1)
END_REGISTER_VP(vp_roundtozero, VP_FROUNDTOZERO)
// llvm.vp.rint(x,mask,vlen)
BEGIN_REGISTER_VP(vp_rint, 1, 2, VP_FRINT, -1)
END_REGISTER_VP(vp_rint, VP_FRINT)
///// } Floating-Point Arithmetic
///// Type Casts {

View File

@ -1042,6 +1042,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_TO_UINT:
case ISD::VP_FP_TO_UINT:
case ISD::FRINT:
case ISD::VP_FRINT:
case ISD::FROUND:
case ISD::VP_FROUND:
case ISD::FROUNDEVEN:
@ -4093,6 +4094,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_SQRT:
case ISD::VP_FCEIL:
case ISD::VP_FFLOOR:
case ISD::VP_FRINT:
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN:
case ISD::VP_FROUNDTOZERO:

View File

@ -484,7 +484,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO};
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
ISD::VP_FRINT};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@ -2004,8 +2005,9 @@ static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
}
// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
// VP_FROUNDEVEN and VP_FROUNDTOZERO by converting to the integer domain and
// back. Taking care to avoid converting values that are nan or already correct.
// VP_FROUNDEVEN, VP_FROUNDTOZERO and VP_FRINT by converting to the integer
// domain and back. Taking care to avoid converting values that are nan or
// already correct.
static SDValue
lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@ -2023,13 +2025,7 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
}
SDValue Mask, VL;
bool IsVP = Op->getOpcode() == ISD::VP_FCEIL ||
Op->getOpcode() == ISD::VP_FFLOOR ||
Op->getOpcode() == ISD::VP_FROUND ||
Op->getOpcode() == ISD::VP_FROUNDEVEN ||
Op->getOpcode() == ISD::VP_FROUNDTOZERO;
if (IsVP) {
if (Op->isVPOpcode()) {
Mask = Op.getOperand(1);
VL = Op.getOperand(2);
} else {
@ -2081,8 +2077,7 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
case ISD::VP_FROUNDTOZERO: {
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
assert(FRM != RISCVFPRndMode::Invalid);
Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src,
Mask,
Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
DAG.getTargetConstant(FRM, DL, XLenVT), VL);
break;
}
@ -2090,6 +2085,9 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
Mask, VL);
break;
case ISD::VP_FRINT:
Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
break;
}
Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
@ -4096,6 +4094,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPStridedStore(Op, DAG);
case ISD::VP_FCEIL:
case ISD::VP_FFLOOR:
case ISD::VP_FRINT:
case ISD::VP_FROUND:
case ISD::VP_FROUNDEVEN:
case ISD::VP_FROUNDTOZERO:
@ -12483,6 +12482,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MULHU_VL)
NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
NODE_NAME_CASE(VFCVT_RM_X_F_VL)
NODE_NAME_CASE(VFCVT_X_F_VL)
NODE_NAME_CASE(SINT_TO_FP_VL)
NODE_NAME_CASE(UINT_TO_FP_VL)

View File

@ -232,7 +232,8 @@ enum NodeType : unsigned {
FCOPYSIGN_VL, // Has a merge operand
VFCVT_RTZ_X_F_VL,
VFCVT_RTZ_XU_F_VL,
VFCVT_X_F_VL, // Has a rounding mode operand.
VFCVT_X_F_VL,
VFCVT_RM_X_F_VL, // Has a rounding mode operand.
SINT_TO_FP_VL,
UINT_TO_FP_VL,
FP_ROUND_VL,

View File

@ -152,7 +152,8 @@ def SDT_RISCVVecCvtX2FOp_VL : SDTypeProfile<1, 4, [
SDTCisVT<4, XLenVT>
]>;
def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVVecCvtX2FOp_VL>;
def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVVecCvtX2FOp_VL>;
def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>;
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>,
@ -1705,7 +1706,8 @@ foreach fvti = AllFloatVectors in {
GPR:$vl, fvti.Log2SEW)>;
// 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;

View File

@ -523,6 +523,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
auto LT = getTypeLegalizationCost(RetTy);
return Cost + (LT.first - 1);
}
case Intrinsic::vp_rint: {
// RISC-V target uses at least 5 instructions to lower rounding intrinsics.
unsigned Cost = 5;
auto LT = getTypeLegalizationCost(RetTy);
if (TLI->isOperationCustom(ISD::VP_FRINT, LT.second))
return Cost * LT.first;
break;
}
}
if (ST->hasVInstructions() && RetTy->isVectorTy()) {

View File

@ -316,6 +316,47 @@ define void @roundeven() {
ret void
}
define void @vp_rint() {
; CHECK-LABEL: 'vp_rint'
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %11 = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
call <vscale x 1 x float> @llvm.vp.rint.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
call <vscale x 2 x float> @llvm.vp.rint.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
call <vscale x 4 x float> @llvm.vp.rint.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
call <vscale x 8 x float> @llvm.vp.rint.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
call <vscale x 16 x float> @llvm.vp.rint.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
call <vscale x 1 x double> @llvm.vp.rint.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
call <vscale x 2 x double> @llvm.vp.rint.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
call <vscale x 4 x double> @llvm.vp.rint.nvx5f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
call <vscale x 8 x double> @llvm.vp.rint.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
ret void
}
declare float @llvm.floor.f32(float)
declare <2 x float> @llvm.floor.v2f32(<2 x float>)
declare <4 x float> @llvm.floor.v4f32(<4 x float>)
@ -455,3 +496,22 @@ declare <vscale x 1 x double> @llvm.roundeven.nvx1f64(<vscale x 1 x double>)
declare <vscale x 2 x double> @llvm.roundeven.nvx2f64(<vscale x 2 x double>)
declare <vscale x 4 x double> @llvm.roundeven.nvx5f64(<vscale x 4 x double>)
declare <vscale x 8 x double> @llvm.roundeven.nvx8f64(<vscale x 8 x double>)
declare <2 x float> @llvm.vp.rint.v2f32(<2 x float>, <2 x i1>, i32)
declare <4 x float> @llvm.vp.rint.v4f32(<4 x float>, <4 x i1>, i32)
declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32)
declare <16 x float> @llvm.vp.rint.v16f32(<16 x float>, <16 x i1>, i32)
declare <vscale x 1 x float> @llvm.vp.rint.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
declare <vscale x 2 x float> @llvm.vp.rint.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
declare <vscale x 4 x float> @llvm.vp.rint.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
declare <vscale x 8 x float> @llvm.vp.rint.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
declare <vscale x 16 x float> @llvm.vp.rint.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
declare double @llvm.vp.rint.f64(double)
declare <2 x double> @llvm.vp.rint.v2f64(<2 x double>, <2 x i1>, i32)
declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
declare <vscale x 1 x double> @llvm.vp.rint.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
declare <vscale x 2 x double> @llvm.vp.rint.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
declare <vscale x 4 x double> @llvm.vp.rint.nvx5f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
declare <vscale x 8 x double> @llvm.vp.rint.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)

View File

@ -0,0 +1,725 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.rint.v2f16(<2 x half>, <2 x i1>, i32)
define <2 x half> @vp_rint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vp_rint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
declare <4 x half> @llvm.vp.rint.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x half> @vp_rint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vp_rint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
declare <8 x half> @llvm.vp.rint.v8f16(<8 x half>, <8 x i1>, i32)
define <8 x half> @vp_rint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vp_rint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
declare <16 x half> @llvm.vp.rint.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vp_rint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmset.m v10
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
declare <2 x float> @llvm.vp.rint.v2f32(<2 x float>, <2 x i1>, i32)
define <2 x float> @vp_rint_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vp_rint_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
declare <4 x float> @llvm.vp.rint.v4f32(<4 x float>, <4 x i1>, i32)
define <4 x float> @vp_rint_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vp_rint_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vp_rint_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmset.m v10
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
declare <16 x float> @llvm.vp.rint.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vp_rint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmset.m v12
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
declare <2 x double> @llvm.vp.rint.v2f64(<2 x double>, <2 x i1>, i32)
define <2 x double> @vp_rint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vp_rint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
declare <4 x double> @llvm.vp.rint.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vp_rint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmset.m v10
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
declare <8 x double> @llvm.vp.rint.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vp_rint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vmset.m v12
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
declare <15 x double> @llvm.vp.rint.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
define <15 x double> @vp_rint_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v15f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmset.m v16
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <15 x i1> poison, i1 true, i32 0
%m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
%v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
declare <16 x double> @llvm.vp.rint.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vp_rint_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmset.m v16
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v2, v0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vslidedown.vi v1, v0, 2
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}
define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: vmset.m v1
; CHECK-NEXT: mv a1, a0
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v2, v1
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
; CHECK-NEXT: sltu a0, a0, a1
; CHECK-NEXT: addi a0, a0, -1
; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
%v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}

View File

@ -0,0 +1,761 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vp_rint_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
define <vscale x 1 x half> @vp_rint_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x half> @llvm.vp.rint.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
declare <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
define <vscale x 2 x half> @vp_rint_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
define <vscale x 2 x half> @vp_rint_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x half> @llvm.vp.rint.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
declare <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
define <vscale x 4 x half> @vp_rint_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x half> @llvm.vp.rint.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
declare <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vp_rint_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x half> @llvm.vp.rint.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
declare <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
define <vscale x 16 x half> @vp_rint_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x half> @llvm.vp.rint.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
declare <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv32f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x half> @llvm.vp.rint.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
declare <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
define <vscale x 1 x float> @vp_rint_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
define <vscale x 1 x float> @vp_rint_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
declare <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
define <vscale x 2 x float> @vp_rint_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
define <vscale x 2 x float> @vp_rint_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
declare <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
define <vscale x 4 x float> @vp_rint_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
declare <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vp_rint_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
declare <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
define <vscale x 16 x float> @vp_rint_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
declare <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
define <vscale x 1 x double> @vp_rint_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
define <vscale x 1 x double> @vp_rint_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv1f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
declare <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v10, v0
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
define <vscale x 2 x double> @vp_rint_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
declare <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v12, v0
; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
define <vscale x 4 x double> @vp_rint_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
declare <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
define <vscale x 7 x double> @vp_rint_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv7f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
%v = call <vscale x 7 x double> @llvm.vp.rint.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
declare <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v16, v0
; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vp_rint_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
; Test splitting.
declare <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a2, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v2, v0, a2
; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vp_rint_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_rint_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: sub a2, a0, a1
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
; CHECK-NEXT: sltu a3, a0, a2
; CHECK-NEXT: addi a3, a3, -1
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB33_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x double> @llvm.vp.rint.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}

View File

@ -63,6 +63,8 @@ protected:
Str << " declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x "
"i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, "