[RISCV] Improve vector fceil/ffloor lowering by changing FRM.
This adds new VFCVT pseudoinstructions that take a rounding mode operand. A custom inserter is used to insert additional instructions to change FRM around the VFCVT. Some of this is borrowed from D122860, but takes a somewhat different direction. We may migrate to that patch, but for now I was trying to keep this as independent from RVV intrinsics as I could. A followup patch will use this approach for FROUND too. Still need to fix the cost model. Reviewed By: arcbbb Differential Revision: https://reviews.llvm.org/D133238
This commit is contained in:
parent
067aab0a85
commit
f0332d12ae
|
@ -1839,8 +1839,6 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
|
|||
// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
|
||||
// and back. Taking care to avoid converting values that are nan or already
|
||||
// correct.
|
||||
// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
|
||||
// have FRM dependencies modeled yet.
|
||||
static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
|
||||
const RISCVSubtarget &Subtarget) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
@ -1887,41 +1885,30 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
|
|||
|
||||
// Truncate to integer and convert back to FP.
|
||||
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
|
||||
SDValue Truncated =
|
||||
DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL);
|
||||
MVT XLenVT = Subtarget.getXLenVT();
|
||||
SDValue Truncated;
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode");
|
||||
case ISD::FCEIL:
|
||||
Truncated =
|
||||
DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
|
||||
DAG.getTargetConstant(RISCVFPRndMode::RUP, DL, XLenVT), VL);
|
||||
break;
|
||||
case ISD::FFLOOR:
|
||||
Truncated =
|
||||
DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
|
||||
DAG.getTargetConstant(RISCVFPRndMode::RDN, DL, XLenVT), VL);
|
||||
break;
|
||||
case ISD::FTRUNC:
|
||||
Truncated = DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL);
|
||||
break;
|
||||
}
|
||||
|
||||
Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
|
||||
Mask, VL);
|
||||
|
||||
if (Op.getOpcode() == ISD::FCEIL) {
|
||||
// If the truncated value is the greater than or equal to the original
|
||||
// value, we've computed the ceil. Otherwise, we went the wrong way and
|
||||
// need to increase by 1.
|
||||
// FIXME: This should use a masked operation. Handle here or in isel?
|
||||
SDValue SplatVal =
|
||||
DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
|
||||
SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
|
||||
DAG.getUNDEF(ContainerVT), SplatVal, VL);
|
||||
SDValue NeedAdjust = DAG.getNode(
|
||||
RISCVISD::SETCC_VL, DL, SetccVT,
|
||||
{Truncated, Src, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
|
||||
Truncated = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Truncated,
|
||||
Splat, Truncated, NeedAdjust, VL);
|
||||
} else if (Op.getOpcode() == ISD::FFLOOR) {
|
||||
// If the truncated value is the less than or equal to the original value,
|
||||
// we've computed the floor. Otherwise, we went the wrong way and need to
|
||||
// decrease by 1.
|
||||
// FIXME: This should use a masked operation. Handle here or in isel?
|
||||
SDValue SplatVal =
|
||||
DAG.getConstantFP(1.0, DL, ContainerVT.getVectorElementType());
|
||||
SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
|
||||
DAG.getUNDEF(ContainerVT), SplatVal, VL);
|
||||
SDValue NeedAdjust = DAG.getNode(
|
||||
RISCVISD::SETCC_VL, DL, SetccVT,
|
||||
{Src, Truncated, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
|
||||
Truncated = DAG.getNode(RISCVISD::FSUB_VL, DL, ContainerVT, Truncated,
|
||||
Splat, Truncated, NeedAdjust, VL);
|
||||
}
|
||||
|
||||
// Restore the original sign so that -0.0 is preserved.
|
||||
Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
|
||||
Src, Src, Mask, VL);
|
||||
|
@ -10664,6 +10651,41 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
|
|||
return TailMBB;
|
||||
}
|
||||
|
||||
static MachineBasicBlock *
|
||||
emitVFCVT_RM_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) {
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
|
||||
|
||||
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
|
||||
Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
|
||||
|
||||
// Update FRM and save the old value.
|
||||
BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
|
||||
.addImm(MI.getOperand(4).getImm());
|
||||
|
||||
// Emit an VFCVT without the FRM operand.
|
||||
assert(MI.getNumOperands() == 8);
|
||||
auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode))
|
||||
.add(MI.getOperand(0))
|
||||
.add(MI.getOperand(1))
|
||||
.add(MI.getOperand(2))
|
||||
.add(MI.getOperand(3))
|
||||
.add(MI.getOperand(5))
|
||||
.add(MI.getOperand(6))
|
||||
.add(MI.getOperand(7));
|
||||
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
|
||||
MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
|
||||
|
||||
// Restore FRM.
|
||||
BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFRM))
|
||||
.addReg(SavedFRM, RegState::Kill);
|
||||
|
||||
// Erase the pseudoinstruction.
|
||||
MI.eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
|
@ -10695,6 +10717,18 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
|
||||
case RISCV::PseudoQuietFLT_D:
|
||||
return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
|
||||
case RISCV::PseudoVFCVT_RM_X_F_V_M1_MASK:
|
||||
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
|
||||
case RISCV::PseudoVFCVT_RM_X_F_V_M2_MASK:
|
||||
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
|
||||
case RISCV::PseudoVFCVT_RM_X_F_V_M4_MASK:
|
||||
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
|
||||
case RISCV::PseudoVFCVT_RM_X_F_V_M8_MASK:
|
||||
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
|
||||
case RISCV::PseudoVFCVT_RM_X_F_V_MF2_MASK:
|
||||
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
|
||||
case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK:
|
||||
return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12242,6 +12276,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
NODE_NAME_CASE(MULHU_VL)
|
||||
NODE_NAME_CASE(FP_TO_SINT_VL)
|
||||
NODE_NAME_CASE(FP_TO_UINT_VL)
|
||||
NODE_NAME_CASE(VFCVT_X_F_VL)
|
||||
NODE_NAME_CASE(SINT_TO_FP_VL)
|
||||
NODE_NAME_CASE(UINT_TO_FP_VL)
|
||||
NODE_NAME_CASE(FP_EXTEND_VL)
|
||||
|
|
|
@ -254,6 +254,7 @@ enum NodeType : unsigned {
|
|||
FCOPYSIGN_VL, // Has a merge operand
|
||||
FP_TO_SINT_VL,
|
||||
FP_TO_UINT_VL,
|
||||
VFCVT_X_F_VL,
|
||||
SINT_TO_FP_VL,
|
||||
UINT_TO_FP_VL,
|
||||
FP_ROUND_VL,
|
||||
|
|
|
@ -1555,6 +1555,7 @@ class SwapSysRegImm<SysReg SR, list<Register> Regs>
|
|||
def ReadFRM : ReadSysReg<SysRegFRM, [FRM]>;
|
||||
def WriteFRM : WriteSysReg<SysRegFRM, [FRM]>;
|
||||
def WriteFRMImm : WriteSysRegImm<SysRegFRM, [FRM]>;
|
||||
def SwapFRMImm : SwapSysRegImm<SysRegFRM, [FRM]>;
|
||||
|
||||
let hasSideEffects = true in {
|
||||
def ReadFFLAGS : ReadSysReg<SysRegFFLAGS, [FFLAGS]>;
|
||||
|
|
|
@ -1031,6 +1031,22 @@ class VPseudoUnaryMaskTA<VReg RetClass, VReg OpClass, string Constraint = ""> :
|
|||
let UsesMaskPolicy = 1;
|
||||
}
|
||||
|
||||
class VPseudoUnaryMaskTA_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
|
||||
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
|
||||
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
|
||||
VMaskOp:$vm, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
|
||||
let mayLoad = 0;
|
||||
let mayStore = 0;
|
||||
let hasSideEffects = 0;
|
||||
let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
|
||||
let HasVLOp = 1;
|
||||
let HasSEWOp = 1;
|
||||
let HasMergeOp = 1;
|
||||
let HasVecPolicyOp = 1;
|
||||
let UsesMaskPolicy = 1;
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
|
||||
// mask unary operation without maskedoff
|
||||
class VPseudoMaskUnarySOutMask:
|
||||
Pseudo<(outs GPR:$rd),
|
||||
|
@ -2769,12 +2785,28 @@ multiclass VPseudoConversion<VReg RetClass,
|
|||
}
|
||||
}
|
||||
|
||||
multiclass VPseudoConversionRM<VReg RetClass,
|
||||
VReg Op1Class,
|
||||
LMULInfo MInfo,
|
||||
string Constraint = ""> {
|
||||
let VLMul = MInfo.value in {
|
||||
def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA_FRM<RetClass, Op1Class,
|
||||
Constraint>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VPseudoVCVTI_V {
|
||||
foreach m = MxListF in
|
||||
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
|
||||
Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
|
||||
}
|
||||
|
||||
multiclass VPseudoVCVTI_RM_V {
|
||||
foreach m = MxListF in
|
||||
defm _V : VPseudoConversionRM<m.vrclass, m.vrclass, m>,
|
||||
Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
|
||||
}
|
||||
|
||||
multiclass VPseudoVCVTF_V {
|
||||
foreach m = MxListF in
|
||||
defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
|
||||
|
@ -4849,6 +4881,7 @@ defm PseudoVFCVT_X_F : VPseudoVCVTI_V;
|
|||
}
|
||||
defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
|
||||
defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
|
||||
defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V;
|
||||
let Uses = [FRM] in {
|
||||
defm PseudoVFCVT_F_XU : VPseudoVCVTF_V;
|
||||
defm PseudoVFCVT_F_X : VPseudoVCVTF_V;
|
||||
|
|
|
@ -146,6 +146,14 @@ def riscv_fp_to_uint_vl : SDNode<"RISCVISD::FP_TO_UINT_VL", SDT_RISCVFP2IOp_VL>;
|
|||
def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
|
||||
def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
|
||||
|
||||
def SDT_RISCVVecCvtX2FOp_VL : SDTypeProfile<1, 4, [
|
||||
SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>,
|
||||
SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>,
|
||||
SDTCisVT<4, XLenVT>
|
||||
]>;
|
||||
|
||||
def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVVecCvtX2FOp_VL>;
|
||||
|
||||
def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
|
||||
SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>,
|
||||
SDTCisVec<1>,
|
||||
|
@ -645,6 +653,19 @@ multiclass VPatConvertFP2IVL_V<SDNode vop, string instruction_name> {
|
|||
}
|
||||
}
|
||||
|
||||
multiclass VPatConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
|
||||
foreach fvti = AllFloatVectors in {
|
||||
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
|
||||
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
|
||||
(fvti.Mask V0), (XLenVT timm:$frm),
|
||||
VLOpFrag)),
|
||||
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
|
||||
(ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
|
||||
(fvti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW,
|
||||
TAIL_AGNOSTIC)>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass VPatConvertI2FPVL_V<SDNode vop, string instruction_name> {
|
||||
foreach fvti = AllFloatVectors in {
|
||||
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
|
||||
|
@ -1471,6 +1492,7 @@ foreach fvti = AllFloatVectors in {
|
|||
GPR:$vl, fvti.Log2SEW)>;
|
||||
|
||||
// 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
|
||||
defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
|
||||
defm : VPatConvertFP2IVL_V<riscv_fp_to_sint_vl, "PseudoVFCVT_RTZ_X_F_V">;
|
||||
defm : VPatConvertFP2IVL_V<riscv_fp_to_uint_vl, "PseudoVFCVT_RTZ_XU_F_V">;
|
||||
defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
|
||||
|
|
|
@ -264,38 +264,38 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
|
|||
// instruction counts with the following adjustments made:
|
||||
// * One vsetvli is considered free.
|
||||
static const CostTblEntry VectorIntrinsicCostTable[]{
|
||||
{Intrinsic::floor, MVT::v2f32, 15},
|
||||
{Intrinsic::floor, MVT::v4f32, 15},
|
||||
{Intrinsic::floor, MVT::v8f32, 15},
|
||||
{Intrinsic::floor, MVT::v16f32, 15},
|
||||
{Intrinsic::floor, MVT::nxv2f32, 15},
|
||||
{Intrinsic::floor, MVT::nxv4f32, 15},
|
||||
{Intrinsic::floor, MVT::nxv8f32, 15},
|
||||
{Intrinsic::floor, MVT::nxv16f32, 15},
|
||||
{Intrinsic::floor, MVT::v2f64, 15},
|
||||
{Intrinsic::floor, MVT::v4f64, 15},
|
||||
{Intrinsic::floor, MVT::v8f64, 15},
|
||||
{Intrinsic::floor, MVT::v16f64, 15},
|
||||
{Intrinsic::floor, MVT::nxv1f64, 15},
|
||||
{Intrinsic::floor, MVT::nxv2f64, 15},
|
||||
{Intrinsic::floor, MVT::nxv4f64, 15},
|
||||
{Intrinsic::floor, MVT::nxv8f64, 15},
|
||||
{Intrinsic::ceil, MVT::v2f32, 15},
|
||||
{Intrinsic::ceil, MVT::v4f32, 15},
|
||||
{Intrinsic::ceil, MVT::v8f32, 15},
|
||||
{Intrinsic::ceil, MVT::v16f32, 15},
|
||||
{Intrinsic::ceil, MVT::nxv2f32, 15},
|
||||
{Intrinsic::ceil, MVT::nxv4f32, 15},
|
||||
{Intrinsic::ceil, MVT::nxv8f32, 15},
|
||||
{Intrinsic::ceil, MVT::nxv16f32, 15},
|
||||
{Intrinsic::ceil, MVT::v2f64, 15},
|
||||
{Intrinsic::ceil, MVT::v4f64, 15},
|
||||
{Intrinsic::ceil, MVT::v8f64, 15},
|
||||
{Intrinsic::ceil, MVT::v16f64, 15},
|
||||
{Intrinsic::ceil, MVT::nxv1f64, 15},
|
||||
{Intrinsic::ceil, MVT::nxv2f64, 15},
|
||||
{Intrinsic::ceil, MVT::nxv4f64, 15},
|
||||
{Intrinsic::ceil, MVT::nxv8f64, 15},
|
||||
{Intrinsic::floor, MVT::v2f32, 9},
|
||||
{Intrinsic::floor, MVT::v4f32, 9},
|
||||
{Intrinsic::floor, MVT::v8f32, 9},
|
||||
{Intrinsic::floor, MVT::v16f32, 9},
|
||||
{Intrinsic::floor, MVT::nxv2f32, 9},
|
||||
{Intrinsic::floor, MVT::nxv4f32, 9},
|
||||
{Intrinsic::floor, MVT::nxv8f32, 9},
|
||||
{Intrinsic::floor, MVT::nxv16f32, 9},
|
||||
{Intrinsic::floor, MVT::v2f64, 9},
|
||||
{Intrinsic::floor, MVT::v4f64, 9},
|
||||
{Intrinsic::floor, MVT::v8f64, 9},
|
||||
{Intrinsic::floor, MVT::v16f64, 9},
|
||||
{Intrinsic::floor, MVT::nxv1f64, 9},
|
||||
{Intrinsic::floor, MVT::nxv2f64, 9},
|
||||
{Intrinsic::floor, MVT::nxv4f64, 9},
|
||||
{Intrinsic::floor, MVT::nxv8f64, 9},
|
||||
{Intrinsic::ceil, MVT::v2f32, 9},
|
||||
{Intrinsic::ceil, MVT::v4f32, 9},
|
||||
{Intrinsic::ceil, MVT::v8f32, 9},
|
||||
{Intrinsic::ceil, MVT::v16f32, 9},
|
||||
{Intrinsic::ceil, MVT::nxv2f32, 9},
|
||||
{Intrinsic::ceil, MVT::nxv4f32, 9},
|
||||
{Intrinsic::ceil, MVT::nxv8f32, 9},
|
||||
{Intrinsic::ceil, MVT::nxv16f32, 9},
|
||||
{Intrinsic::ceil, MVT::v2f64, 9},
|
||||
{Intrinsic::ceil, MVT::v4f64, 9},
|
||||
{Intrinsic::ceil, MVT::v8f64, 9},
|
||||
{Intrinsic::ceil, MVT::v16f64, 9},
|
||||
{Intrinsic::ceil, MVT::nxv1f64, 9},
|
||||
{Intrinsic::ceil, MVT::nxv2f64, 9},
|
||||
{Intrinsic::ceil, MVT::nxv4f64, 9},
|
||||
{Intrinsic::ceil, MVT::nxv8f64, 9},
|
||||
{Intrinsic::trunc, MVT::v2f32, 7},
|
||||
{Intrinsic::trunc, MVT::v4f32, 7},
|
||||
{Intrinsic::trunc, MVT::v8f32, 7},
|
||||
|
|
|
@ -4,23 +4,23 @@
|
|||
define void @floor() {
|
||||
; CHECK-LABEL: 'floor'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.floor.f32(float undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %8 = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %9 = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call double @llvm.floor.f64(double undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %11 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %12 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %15 = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %16 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %17 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %18 = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
call float @llvm.floor.f32(float undef)
|
||||
|
@ -47,23 +47,23 @@ define void @floor() {
|
|||
define void @ceil() {
|
||||
; CHECK-LABEL: 'ceil'
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.ceil.f32(float undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %6 = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %7 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %8 = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %9 = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call double @llvm.ceil.f64(double undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %11 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %12 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %14 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %15 = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %16 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %17 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %18 = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
|
||||
;
|
||||
call float @llvm.ceil.f32(float undef)
|
||||
|
|
|
@ -11,18 +11,12 @@ define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
|
|||
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x half> @llvm.ceil.nxv1f16(<vscale x 1 x half> %x)
|
||||
ret <vscale x 1 x half> %a
|
||||
|
@ -36,18 +30,12 @@ define <vscale x 2 x half> @ceil_nxv2f16(<vscale x 2 x half> %x) {
|
|||
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %x)
|
||||
ret <vscale x 2 x half> %a
|
||||
|
@ -61,18 +49,12 @@ define <vscale x 4 x half> @ceil_nxv4f16(<vscale x 4 x half> %x) {
|
|||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %x)
|
||||
ret <vscale x 4 x half> %a
|
||||
|
@ -85,19 +67,13 @@ define <vscale x 8 x half> @ceil_nxv8f16(<vscale x 8 x half> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v10, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v10, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %x)
|
||||
ret <vscale x 8 x half> %a
|
||||
|
@ -110,19 +86,13 @@ define <vscale x 16 x half> @ceil_nxv16f16(<vscale x 16 x half> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v12, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 16 x half> @llvm.ceil.nxv16f16(<vscale x 16 x half> %x)
|
||||
ret <vscale x 16 x half> %a
|
||||
|
@ -135,19 +105,13 @@ define <vscale x 32 x half> @ceil_nxv32f16(<vscale x 32 x half> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v24, v8
|
||||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v16, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 32 x half> @llvm.ceil.nxv32f16(<vscale x 32 x half> %x)
|
||||
ret <vscale x 32 x half> %a
|
||||
|
@ -161,18 +125,12 @@ define <vscale x 1 x float> @ceil_nxv1f32(<vscale x 1 x float> %x) {
|
|||
; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> %x)
|
||||
ret <vscale x 1 x float> %a
|
||||
|
@ -186,18 +144,12 @@ define <vscale x 2 x float> @ceil_nxv2f32(<vscale x 2 x float> %x) {
|
|||
; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %x)
|
||||
ret <vscale x 2 x float> %a
|
||||
|
@ -210,19 +162,13 @@ define <vscale x 4 x float> @ceil_nxv4f32(<vscale x 4 x float> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v10, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v10, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %x)
|
||||
ret <vscale x 4 x float> %a
|
||||
|
@ -235,19 +181,13 @@ define <vscale x 8 x float> @ceil_nxv8f32(<vscale x 8 x float> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v12, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> %x)
|
||||
ret <vscale x 8 x float> %a
|
||||
|
@ -260,19 +200,13 @@ define <vscale x 16 x float> @ceil_nxv16f32(<vscale x 16 x float> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v24, v8
|
||||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v16, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> %x)
|
||||
ret <vscale x 16 x float> %a
|
||||
|
@ -286,18 +220,12 @@ define <vscale x 1 x double> @ceil_nxv1f64(<vscale x 1 x double> %x) {
|
|||
; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> %x)
|
||||
ret <vscale x 1 x double> %a
|
||||
|
@ -310,19 +238,13 @@ define <vscale x 2 x double> @ceil_nxv2f64(<vscale x 2 x double> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v12, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfadd.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v10, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v10, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %x)
|
||||
ret <vscale x 2 x double> %a
|
||||
|
@ -335,19 +257,13 @@ define <vscale x 4 x double> @ceil_nxv4f64(<vscale x 4 x double> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v16, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfadd.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v12, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> %x)
|
||||
ret <vscale x 4 x double> %a
|
||||
|
@ -360,19 +276,13 @@ define <vscale x 8 x double> @ceil_nxv8f64(<vscale x 8 x double> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v24, v8
|
||||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v24, v8, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfadd.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v16, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> %x)
|
||||
ret <vscale x 8 x double> %a
|
||||
|
|
|
@ -11,18 +11,12 @@ define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
|
|||
; CHECK-NEXT: flh ft0, %lo(.LCPI0_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI0_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI0_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x half> @llvm.floor.nxv1f16(<vscale x 1 x half> %x)
|
||||
ret <vscale x 1 x half> %a
|
||||
|
@ -36,18 +30,12 @@ define <vscale x 2 x half> @floor_nxv2f16(<vscale x 2 x half> %x) {
|
|||
; CHECK-NEXT: flh ft0, %lo(.LCPI1_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI1_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI1_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x half> @llvm.floor.nxv2f16(<vscale x 2 x half> %x)
|
||||
ret <vscale x 2 x half> %a
|
||||
|
@ -61,18 +49,12 @@ define <vscale x 4 x half> @floor_nxv4f16(<vscale x 4 x half> %x) {
|
|||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI2_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI2_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x half> @llvm.floor.nxv4f16(<vscale x 4 x half> %x)
|
||||
ret <vscale x 4 x half> %a
|
||||
|
@ -85,19 +67,13 @@ define <vscale x 8 x half> @floor_nxv8f16(<vscale x 8 x half> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI3_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI3_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI3_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v10, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v10, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 8 x half> @llvm.floor.nxv8f16(<vscale x 8 x half> %x)
|
||||
ret <vscale x 8 x half> %a
|
||||
|
@ -110,19 +86,13 @@ define <vscale x 16 x half> @floor_nxv16f16(<vscale x 16 x half> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI4_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v12, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 16 x half> @llvm.floor.nxv16f16(<vscale x 16 x half> %x)
|
||||
ret <vscale x 16 x half> %a
|
||||
|
@ -135,19 +105,13 @@ define <vscale x 32 x half> @floor_nxv32f16(<vscale x 32 x half> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI5_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v24, v8
|
||||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI5_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI5_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v16, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 32 x half> @llvm.floor.nxv32f16(<vscale x 32 x half> %x)
|
||||
ret <vscale x 32 x half> %a
|
||||
|
@ -161,18 +125,12 @@ define <vscale x 1 x float> @floor_nxv1f32(<vscale x 1 x float> %x) {
|
|||
; CHECK-NEXT: flw ft0, %lo(.LCPI6_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI6_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI6_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> %x)
|
||||
ret <vscale x 1 x float> %a
|
||||
|
@ -186,18 +144,12 @@ define <vscale x 2 x float> @floor_nxv2f32(<vscale x 2 x float> %x) {
|
|||
; CHECK-NEXT: flw ft0, %lo(.LCPI7_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI7_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI7_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> %x)
|
||||
ret <vscale x 2 x float> %a
|
||||
|
@ -210,19 +162,13 @@ define <vscale x 4 x float> @floor_nxv4f32(<vscale x 4 x float> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI8_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI8_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI8_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v10, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v10, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> %x)
|
||||
ret <vscale x 4 x float> %a
|
||||
|
@ -235,19 +181,13 @@ define <vscale x 8 x float> @floor_nxv8f32(<vscale x 8 x float> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI9_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI9_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v12, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> %x)
|
||||
ret <vscale x 8 x float> %a
|
||||
|
@ -260,19 +200,13 @@ define <vscale x 16 x float> @floor_nxv16f32(<vscale x 16 x float> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI10_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v24, v8
|
||||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI10_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v16, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> %x)
|
||||
ret <vscale x 16 x float> %a
|
||||
|
@ -286,18 +220,12 @@ define <vscale x 1 x double> @floor_nxv1f64(<vscale x 1 x double> %x) {
|
|||
; CHECK-NEXT: fld ft0, %lo(.LCPI11_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v9, v9, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI11_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI11_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v10, v9
|
||||
; CHECK-NEXT: vmflt.vv v10, v8, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v10
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v11, v8, v0.t
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> %x)
|
||||
ret <vscale x 1 x double> %a
|
||||
|
@ -310,19 +238,13 @@ define <vscale x 2 x double> @floor_nxv2f64(<vscale x 2 x double> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI12_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v10, v12, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI12_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI12_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v11, v10
|
||||
; CHECK-NEXT: vmflt.vv v11, v8, v12, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v11
|
||||
; CHECK-NEXT: vfsub.vf v12, v12, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v10
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v10, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v10, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> %x)
|
||||
ret <vscale x 2 x double> %a
|
||||
|
@ -335,19 +257,13 @@ define <vscale x 4 x double> @floor_nxv4f64(<vscale x 4 x double> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI13_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI13_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v12, v16, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI13_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI13_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v13, v12
|
||||
; CHECK-NEXT: vmflt.vv v13, v8, v16, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v13
|
||||
; CHECK-NEXT: vfsub.vf v16, v16, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v12
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v12, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v12, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> %x)
|
||||
ret <vscale x 4 x double> %a
|
||||
|
@ -360,19 +276,13 @@ define <vscale x 8 x double> @floor_nxv8f64(<vscale x 8 x double> %x) {
|
|||
; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI14_0)(a0)
|
||||
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu
|
||||
; CHECK-NEXT: vfabs.v v24, v8
|
||||
; CHECK-NEXT: vmflt.vf v16, v24, ft0
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v24, v8, v0.t
|
||||
; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI14_1)(a0)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v17, v16
|
||||
; CHECK-NEXT: vmflt.vv v17, v8, v24, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v17
|
||||
; CHECK-NEXT: vfsub.vf v24, v24, ft0, v0.t
|
||||
; CHECK-NEXT: vmv1r.v v0, v16
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t
|
||||
; CHECK-NEXT: vfabs.v v16, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v16, ft0
|
||||
; CHECK-NEXT: fsrmi a0, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a0
|
||||
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
|
||||
; CHECK-NEXT: ret
|
||||
%a = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> %x)
|
||||
ret <vscale x 8 x double> %a
|
||||
|
|
|
@ -2030,23 +2030,17 @@ define void @ceil_v8f16(<8 x half>* %x) {
|
|||
; CHECK-LABEL: ceil_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vle16.v v10, (a0)
|
||||
; CHECK-NEXT: vle16.v v8, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI94_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI94_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI94_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI94_1)(a1)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse16.v v10, (a0)
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a1, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a1
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: vse16.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
|
||||
|
@ -2059,23 +2053,17 @@ define void @ceil_v4f32(<4 x float>* %x) {
|
|||
; CHECK-LABEL: ceil_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vle32.v v10, (a0)
|
||||
; CHECK-NEXT: vle32.v v8, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI95_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI95_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI95_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI95_1)(a1)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse32.v v10, (a0)
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a1, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a1
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: vse32.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
|
||||
|
@ -2088,23 +2076,17 @@ define void @ceil_v2f64(<2 x double>* %x) {
|
|||
; CHECK-LABEL: ceil_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vle64.v v10, (a0)
|
||||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI96_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI96_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI96_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI96_1)(a1)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v11, v10, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfadd.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse64.v v10, (a0)
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a1, 3
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a1
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: vse64.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
|
||||
|
@ -2117,23 +2099,17 @@ define void @floor_v8f16(<8 x half>* %x) {
|
|||
; CHECK-LABEL: floor_v8f16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
|
||||
; CHECK-NEXT: vle16.v v10, (a0)
|
||||
; CHECK-NEXT: vle16.v v8, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI97_0)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI97_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI97_1)
|
||||
; CHECK-NEXT: flh ft0, %lo(.LCPI97_1)(a1)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse16.v v10, (a0)
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a1, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a1
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: vse16.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <8 x half>, <8 x half>* %x
|
||||
%b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
|
||||
|
@ -2146,23 +2122,17 @@ define void @floor_v4f32(<4 x float>* %x) {
|
|||
; CHECK-LABEL: floor_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
|
||||
; CHECK-NEXT: vle32.v v10, (a0)
|
||||
; CHECK-NEXT: vle32.v v8, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI98_0)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI98_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI98_1)
|
||||
; CHECK-NEXT: flw ft0, %lo(.LCPI98_1)(a1)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse32.v v10, (a0)
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a1, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a1
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: vse32.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <4 x float>, <4 x float>* %x
|
||||
%b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
|
||||
|
@ -2175,23 +2145,17 @@ define void @floor_v2f64(<2 x double>* %x) {
|
|||
; CHECK-LABEL: floor_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
|
||||
; CHECK-NEXT: vle64.v v10, (a0)
|
||||
; CHECK-NEXT: vle64.v v8, (a0)
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI99_0)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI99_0)(a1)
|
||||
; CHECK-NEXT: vfabs.v v8, v10
|
||||
; CHECK-NEXT: vmflt.vf v8, v8, ft0
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v10, v0.t
|
||||
; CHECK-NEXT: lui a1, %hi(.LCPI99_1)
|
||||
; CHECK-NEXT: fld ft0, %lo(.LCPI99_1)(a1)
|
||||
; CHECK-NEXT: vfcvt.f.x.v v11, v9, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vv v9, v10, v11, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v9
|
||||
; CHECK-NEXT: vfsub.vf v11, v11, ft0, v0.t
|
||||
; CHECK-NEXT: vmv.v.v v0, v8
|
||||
; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t
|
||||
; CHECK-NEXT: vse64.v v10, (a0)
|
||||
; CHECK-NEXT: vfabs.v v9, v8
|
||||
; CHECK-NEXT: vmflt.vf v0, v9, ft0
|
||||
; CHECK-NEXT: fsrmi a1, 2
|
||||
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
|
||||
; CHECK-NEXT: fsrm a1
|
||||
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
|
||||
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
|
||||
; CHECK-NEXT: vse64.v v8, (a0)
|
||||
; CHECK-NEXT: ret
|
||||
%a = load <2 x double>, <2 x double>* %x
|
||||
%b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
|
||||
|
|
Loading…
Reference in New Issue