[VP][RISCV] Add vp.ceil and RISC-V support

Previous commit 8b00b24f85 missed to add `int_ceil` anchor for the
llvm.ceil.* section under LangRef.rst

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D134586
This commit is contained in:
eopXD 2022-09-24 05:52:55 -07:00
parent 4329ca61e8
commit 163cb33854
8 changed files with 1634 additions and 14 deletions

View File

@ -14762,6 +14762,8 @@ Semantics:
This function returns the same values as the libm ``floor`` functions
would, and handles error conditions in the same way.
.. _int_ceil:
'``llvm.ceil.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -21413,6 +21415,52 @@ Examples:
%t = icmp ne <4 x i32> %a, %b
%also.r = select <4 x i1> %mask, <4 x i1> %t, <4 x i1> poison
.. _int_vp_ceil:
'``llvm.vp.ceil.*``' Intrinsics
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <16 x float> @llvm.vp.ceil.v16f32 (<16 x float> <op>, <16 x i1> <mask>, i32 <vector_length>)
declare <vscale x 4 x float> @llvm.vp.ceil.nxv4f32 (<vscale x 4 x float> <op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
declare <256 x double> @llvm.vp.ceil.v256f64 (<256 x double> <op>, <256 x i1> <mask>, i32 <vector_length>)
Overview:
"""""""""
Predicated floating-point ceiling of a vector of floating-point values.
Arguments:
""""""""""
The first operand and the result have the same vector of floating-point type.
The second operand is the vector mask and has the same number of elements as the
result vector type. The third operand is the explicit vector length of the
operation.
Semantics:
""""""""""
The '``llvm.vp.ceil``' intrinsic performs floating-point ceiling
(:ref:`ceil <int_ceil>`) of the first vector operand on each enabled lane. The
result on disabled lanes is undefined.
Examples:
"""""""""
.. code-block:: llvm
%r = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %a, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
%t = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
.. _int_mload_mstore:

View File

@ -1591,6 +1591,10 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
def int_vp_ceil : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
// Casts
def int_vp_trunc : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],

View File

@ -260,6 +260,11 @@ END_REGISTER_VP(vp_minnum, VP_FMINNUM)
// llvm.vp.maxnum(x, y, mask,vlen)
BEGIN_REGISTER_VP(vp_maxnum, 2, 3, VP_FMAXNUM, -1)
END_REGISTER_VP(vp_maxnum, VP_FMAXNUM)
// llvm.vp.ceil(x,mask,vlen)
BEGIN_REGISTER_VP(vp_ceil, 1, 2, VP_FCEIL, -1)
END_REGISTER_VP(vp_ceil, VP_FCEIL)
///// } Floating-Point Arithmetic
///// Type Casts {

View File

@ -1020,6 +1020,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CTPOP:
case ISD::FABS: case ISD::VP_FABS:
case ISD::FCEIL:
case ISD::VP_FCEIL:
case ISD::FCOS:
case ISD::FEXP:
case ISD::FEXP2:
@ -4084,6 +4085,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG: case ISD::VP_FNEG:
case ISD::VP_FABS:
case ISD::VP_SQRT:
case ISD::VP_FCEIL:
case ISD::FREEZE:
case ISD::ARITH_FENCE:
case ISD::FCANONICALIZE:

View File

@ -450,7 +450,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM};
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
ISD::VP_FCEIL};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@ -1948,16 +1949,18 @@ static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
case ISD::FTRUNC: return RISCVFPRndMode::RTZ;
case ISD::FFLOOR: return RISCVFPRndMode::RDN;
case ISD::FCEIL: return RISCVFPRndMode::RUP;
case ISD::FCEIL:
case ISD::VP_FCEIL:
return RISCVFPRndMode::RUP;
case ISD::FROUND: return RISCVFPRndMode::RMM;
}
return RISCVFPRndMode::Invalid;
}
// Expand vector FTRUNC, FCEIL, FFLOOR, and FROUND by converting to the integer
// domain/ and back. Taking care to avoid converting values that are nan or
// already correct.
// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND and VP_FCEIL by converting to the
// integer domain/ and back. Taking care to avoid converting values that are nan
// or already correct.
static SDValue
lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@ -1974,14 +1977,21 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue Mask, VL;
bool IsVP = Op->getOpcode() == ISD::VP_FCEIL;
if (IsVP) {
Mask = Op.getOperand(1);
VL = Op.getOperand(2);
} else {
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
}
// Freeze the source since we are increasing the number of uses.
Src = DAG.getFreeze(Src);
// We do the conversion on the absolute value and fix the sign at the end.
SDValue Abs =
DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, TrueMask, VL);
SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
// Determine the largest integer that can be represented exactly. This and
// values larger than it don't have any fractional bits so don't need to
@ -1998,9 +2008,10 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
// If abs(Src) was larger than MaxVal or nan, keep it.
MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
{Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
DAG.getUNDEF(SetccVT), TrueMask, VL});
Mask =
DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
{Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
DAG.getUNDEF(SetccVT), Mask, VL});
// Truncate to integer and convert back to FP.
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
@ -2011,17 +2022,19 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
default:
llvm_unreachable("Unexpected opcode");
case ISD::FCEIL:
case ISD::VP_FCEIL:
case ISD::FFLOOR:
case ISD::FROUND: {
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
assert(FRM != RISCVFPRndMode::Invalid);
Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src,
Mask,
DAG.getTargetConstant(FRM, DL, XLenVT), VL);
break;
}
case ISD::FTRUNC:
Truncated =
DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src, Mask, VL);
Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
Mask, VL);
break;
}
@ -3889,6 +3902,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPStridedLoad(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
return lowerVPStridedStore(Op, DAG);
case ISD::VP_FCEIL:
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
}
}

View File

@ -0,0 +1,795 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
define <vscale x 1 x half> @vfabs_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
define <vscale x 1 x half> @vfabs_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv1f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x half> @llvm.vp.ceil.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
declare <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
define <vscale x 2 x half> @vfabs_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
define <vscale x 2 x half> @vfabs_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x half> @llvm.vp.ceil.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
declare <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
define <vscale x 4 x half> @vfabs_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
define <vscale x 4 x half> @vfabs_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x half> @llvm.vp.ceil.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
declare <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
define <vscale x 8 x half> @vfabs_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
define <vscale x 8 x half> @vfabs_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x half> @llvm.vp.ceil.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
declare <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
define <vscale x 16 x half> @vfabs_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
define <vscale x 16 x half> @vfabs_vv_nxv16f16_unmasked(<vscale x 16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x half> @llvm.vp.ceil.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
declare <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half>, <vscale x 32 x i1>, i32)
define <vscale x 32 x half> @vfabs_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv32f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
define <vscale x 32 x half> @vfabs_vv_nxv32f16_unmasked(<vscale x 32 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv32f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
%v = call <vscale x 32 x half> @llvm.vp.ceil.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
declare <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
define <vscale x 1 x float> @vfabs_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv1f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
define <vscale x 1 x float> @vfabs_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv1f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x float> %v
}
declare <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
define <vscale x 2 x float> @vfabs_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
define <vscale x 2 x float> @vfabs_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x float> %v
}
declare <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
define <vscale x 4 x float> @vfabs_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
define <vscale x 4 x float> @vfabs_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x float> %v
}
declare <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
define <vscale x 8 x float> @vfabs_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
define <vscale x 8 x float> @vfabs_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x float> %v
}
declare <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
define <vscale x 16 x float> @vfabs_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
define <vscale x 16 x float> @vfabs_vv_nxv16f32_unmasked(<vscale x 16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x float> %v
}
declare <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
define <vscale x 1 x double> @vfabs_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv1f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
define <vscale x 1 x double> @vfabs_vv_nxv1f64_unmasked(<vscale x 1 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv1f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8
; CHECK-NEXT: vmflt.vf v0, v9, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
%v = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x double> %v
}
declare <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
define <vscale x 2 x double> @vfabs_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
define <vscale x 2 x double> @vfabs_vv_nxv2f64_unmasked(<vscale x 2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v10, v8
; CHECK-NEXT: vmflt.vf v0, v10, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
%v = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x double> %v
}
declare <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
define <vscale x 4 x double> @vfabs_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
define <vscale x 4 x double> @vfabs_vv_nxv4f64_unmasked(<vscale x 4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v12, v8
; CHECK-NEXT: vmflt.vf v0, v12, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
%v = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x double> %v
}
declare <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double>, <vscale x 7 x i1>, i32)
define <vscale x 7 x double> @vfabs_vv_nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv7f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI28_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI28_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
define <vscale x 7 x double> @vfabs_vv_nxv7f64_unmasked(<vscale x 7 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv7f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI29_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI29_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 7 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 7 x i1> %head, <vscale x 7 x i1> poison, <vscale x 7 x i32> zeroinitializer
%v = call <vscale x 7 x double> @llvm.vp.ceil.nxv7f64(<vscale x 7 x double> %va, <vscale x 7 x i1> %m, i32 %evl)
ret <vscale x 7 x double> %v
}
declare <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
define <vscale x 8 x double> @vfabs_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI30_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
define <vscale x 8 x double> @vfabs_vv_nxv8f64_unmasked(<vscale x 8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI31_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v16, v8
; CHECK-NEXT: vmflt.vf v0, v16, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
%v = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x double> %v
}
; Test splitting.
declare <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double>, <vscale x 16 x i1>, i32)
define <vscale x 16 x double> @vfabs_vv_nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: srli a4, a1, 3
; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, mu
; CHECK-NEXT: sub a3, a0, a1
; CHECK-NEXT: vslidedown.vx v0, v0, a4
; CHECK-NEXT: bltu a0, a3, .LBB32_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: .LBB32_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: sub sp, sp, a3
; CHECK-NEXT: lui a3, %hi(.LCPI32_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a3)
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a2, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB32_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: .LBB32_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}
define <vscale x 16 x double> @vfabs_vv_nxv16f64_unmasked(<vscale x 16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vfabs_vv_nxv16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: mv a2, a0
; CHECK-NEXT: bltu a0, a1, .LBB33_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a2, a1
; CHECK-NEXT: .LBB33_2:
; CHECK-NEXT: lui a3, %hi(.LCPI33_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a3)
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a2, 3
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a2
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: sub a1, a0, a1
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB33_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a3, a1
; CHECK-NEXT: .LBB33_4:
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16
; CHECK-NEXT: vmflt.vf v0, v24, ft0
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: ret
%head = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
%m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
%v = call <vscale x 16 x double> @llvm.vp.ceil.nxv16f64(<vscale x 16 x double> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x double> %v
}

View File

@ -0,0 +1,749 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.ceil.v2f16(<2 x half>, <2 x i1>, i32)
define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v2f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI1_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
declare <4 x half> @llvm.vp.ceil.v4f16(<4 x half>, <4 x i1>, i32)
define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI3_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
declare <8 x half> @llvm.vp.ceil.v8f16(<8 x half>, <8 x i1>, i32)
define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI4_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI5_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
declare <16 x half> @llvm.vp.ceil.v16f16(<16 x half>, <16 x i1>, i32)
define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI6_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI6_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f16_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI7_0)
; CHECK-NEXT: flh ft0, %lo(.LCPI7_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
declare <2 x float> @llvm.vp.ceil.v2f32(<2 x float>, <2 x i1>, i32)
define <2 x float> @vp_ceil_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI8_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
define <2 x float> @vp_ceil_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v2f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI9_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl)
ret <2 x float> %v
}
declare <4 x float> @llvm.vp.ceil.v4f32(<4 x float>, <4 x i1>, i32)
define <4 x float> @vp_ceil_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI10_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
define <4 x float> @vp_ceil_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI11_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI11_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl)
ret <4 x float> %v
}
declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, i32)
define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI12_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
define <8 x float> @vp_ceil_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI13_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI13_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl)
ret <8 x float> %v
}
declare <16 x float> @llvm.vp.ceil.v16f32(<16 x float>, <16 x i1>, i32)
define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI14_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI14_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f32_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI15_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI15_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl)
ret <16 x float> %v
}
declare <2 x double> @llvm.vp.ceil.v2f64(<2 x double>, <2 x i1>, i32)
define <2 x double> @vp_ceil_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI16_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI16_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v2f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI17_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI17_0)(a1)
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
; CHECK-NEXT: vfabs.v v9, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; CHECK-NEXT: vmflt.vf v0, v9, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <2 x i1> poison, i1 true, i32 0
%m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
%v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl)
ret <2 x double> %v
}
declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32)
define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI18_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI18_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v4f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI19_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI19_0)(a1)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
; CHECK-NEXT: vfabs.v v12, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; CHECK-NEXT: vmflt.vf v10, v12, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl)
ret <4 x double> %v
}
declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32)
define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI20_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI20_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v8f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI21_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI21_0)(a1)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vmflt.vf v12, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vmv1r.v v0, v12
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i32 0
%m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl)
ret <8 x double> %v
}
declare <15 x double> @llvm.vp.ceil.v15f64(<15 x double>, <15 x i1>, i32)
define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v15f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI22_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI22_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v15f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI23_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI23_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <15 x i1> poison, i1 true, i32 0
%m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
%v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl)
ret <15 x double> %v
}
declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32)
define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f64:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI24_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI24_0)(a1)
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v16f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI25_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI25_0)(a1)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v16, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v16
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: ret
%head = insertelement <16 x i1> poison, i1 true, i32 0
%m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
%v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl)
ret <16 x double> %v
}
declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32)
define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v32f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vmv1r.v v1, v0
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vslidedown.vi v0, v0, 2
; CHECK-NEXT: bltu a0, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI26_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI26_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: bltu a0, a1, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v24, v16, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}
define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %evl) {
; CHECK-LABEL: vp_ceil_v32f64_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vmset.m v1
; CHECK-NEXT: bltu a0, a2, .LBB27_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: .LBB27_2:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: lui a2, %hi(.LCPI27_0)
; CHECK-NEXT: fld ft0, %lo(.LCPI27_0)(a2)
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v16, v0.t
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: vmflt.vf v2, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a1, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v2
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
; CHECK-NEXT: fsrm a1
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a2) # Unknown-size Folded Reload
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
; CHECK-NEXT: bltu a0, a1, .LBB27_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
; CHECK-NEXT: .LBB27_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfabs.v v24, v8, v0.t
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmflt.vf v1, v24, ft0, v0.t
; CHECK-NEXT: fsrmi a0, 3
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t
; CHECK-NEXT: fsrm a0
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <32 x i1> poison, i1 true, i32 0
%m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
%v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl)
ret <32 x double> %v
}

View File

@ -52,6 +52,8 @@ protected:
Str << " declare <8 x float> @llvm.vp." << BinaryFPOpcode
<< ".v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) ";
Str << " declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, "
"i32)";
Str << " declare <8 x float> @llvm.vp.fabs.v8f32(<8 x float>, <8 x i1>, "