TargetTransformInfo: convert Optional to std::optional
Recommit: added missing "#include <cstdint>".
This commit is contained in:
parent
1127e479e8
commit
86fe4dfdb6
|
@ -552,22 +552,21 @@ public:
|
|||
/// intrinsics. This function will be called from the InstCombine pass every
|
||||
/// time a target-specific intrinsic is encountered.
|
||||
///
|
||||
/// \returns None to not do anything target specific or a value that will be
|
||||
/// returned from the InstCombiner. It is possible to return null and stop
|
||||
/// further processing of the intrinsic by returning nullptr.
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
/// \returns std::nullopt to not do anything target specific or a value that
|
||||
/// will be returned from the InstCombiner. It is possible to return null and
|
||||
/// stop further processing of the intrinsic by returning nullptr.
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
|
||||
IntrinsicInst & II) const;
|
||||
/// Can be used to implement target-specific instruction combining.
|
||||
/// \see instCombineIntrinsic
|
||||
Optional<Value *>
|
||||
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
|
||||
APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) const;
|
||||
std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
|
||||
InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
|
||||
KnownBits & Known, bool &KnownBitsComputed) const;
|
||||
/// Can be used to implement target-specific instruction combining.
|
||||
/// \see instCombineIntrinsic
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
|
||||
APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
SimplifyAndSetOp) const;
|
||||
/// @}
|
||||
|
@ -971,7 +970,7 @@ public:
|
|||
unsigned getMinVectorRegisterBitWidth() const;
|
||||
|
||||
/// \return The maximum value of vscale if the target specifies an
|
||||
/// architectural maximum vector length, and None otherwise.
|
||||
/// architectural maximum vector length, and std::nullopt otherwise.
|
||||
std::optional<unsigned> getMaxVScale() const;
|
||||
|
||||
/// \return the value of vscale to tune the cost model for.
|
||||
|
@ -1028,10 +1027,10 @@ public:
|
|||
};
|
||||
|
||||
/// \return The size of the cache level in bytes, if available.
|
||||
Optional<unsigned> getCacheSize(CacheLevel Level) const;
|
||||
std::optional<unsigned> getCacheSize(CacheLevel Level) const;
|
||||
|
||||
/// \return The associativity of the cache level, if available.
|
||||
Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
|
||||
std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
|
||||
|
||||
/// \return How much before a load we should place the prefetch
|
||||
/// instruction. This is currently measured in number of
|
||||
|
@ -1264,8 +1263,8 @@ public:
|
|||
|
||||
/// A helper function to determine the type of reduction algorithm used
|
||||
/// for a given \p Opcode and set of FastMathFlags \p FMF.
|
||||
static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
|
||||
return FMF != None && !(*FMF).allowReassoc();
|
||||
static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
|
||||
return FMF && !(*FMF).allowReassoc();
|
||||
}
|
||||
|
||||
/// Calculate the cost of vector reduction intrinsics.
|
||||
|
@ -1293,7 +1292,7 @@ public:
|
|||
/// allowed.
|
||||
///
|
||||
InstructionCost getArithmeticReductionCost(
|
||||
unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
|
||||
unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
|
||||
|
||||
InstructionCost getMinMaxReductionCost(
|
||||
|
@ -1315,7 +1314,7 @@ public:
|
|||
/// ResTy vecreduce.opcode(ext(Ty A)).
|
||||
InstructionCost getExtendedReductionCost(
|
||||
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
|
||||
|
||||
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
|
||||
|
@ -1369,11 +1368,10 @@ public:
|
|||
Type *ExpectedType) const;
|
||||
|
||||
/// \returns The type to use in a loop expansion of a memcpy call.
|
||||
Type *
|
||||
getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
|
||||
unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicElementSize = None) const;
|
||||
Type *getMemcpyLoopLoweringType(
|
||||
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
|
||||
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
|
||||
std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
|
||||
|
||||
/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
|
||||
/// \param RemainingBytes The number of bytes to copy.
|
||||
|
@ -1385,7 +1383,7 @@ public:
|
|||
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
||||
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicCpySize = None) const;
|
||||
std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
|
||||
|
||||
/// \returns True if the two functions have compatible attributes for inlining
|
||||
/// purposes.
|
||||
|
@ -1610,15 +1608,14 @@ public:
|
|||
DominatorTree *DT, LoopVectorizationLegality *LVL,
|
||||
InterleavedAccessInfo *IAI) = 0;
|
||||
virtual PredicationStyle emitGetActiveLaneMask() = 0;
|
||||
virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) = 0;
|
||||
virtual Optional<Value *>
|
||||
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
|
||||
APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) = 0;
|
||||
virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
virtual std::optional<Instruction *> instCombineIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II) = 0;
|
||||
virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
|
||||
KnownBits & Known, bool &KnownBitsComputed) = 0;
|
||||
virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
|
||||
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
SimplifyAndSetOp) = 0;
|
||||
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
|
||||
|
@ -1727,8 +1724,9 @@ public:
|
|||
virtual bool shouldConsiderAddressTypePromotion(
|
||||
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
|
||||
virtual unsigned getCacheLineSize() const = 0;
|
||||
virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
|
||||
virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
|
||||
virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
|
||||
virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
|
||||
const = 0;
|
||||
|
||||
/// \return How much before a load we should place the prefetch
|
||||
/// instruction. This is currently measured in number of
|
||||
|
@ -1818,14 +1816,14 @@ public:
|
|||
bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
|
||||
virtual InstructionCost
|
||||
getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) = 0;
|
||||
virtual InstructionCost
|
||||
getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
|
||||
TTI::TargetCostKind CostKind) = 0;
|
||||
virtual InstructionCost getExtendedReductionCost(
|
||||
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
|
||||
virtual InstructionCost getMulAccReductionCost(
|
||||
bool IsUnsigned, Type *ResTy, VectorType *Ty,
|
||||
|
@ -1846,17 +1844,16 @@ public:
|
|||
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
|
||||
virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
|
||||
Type *ExpectedType) = 0;
|
||||
virtual Type *
|
||||
getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
|
||||
unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicElementSize) const = 0;
|
||||
virtual Type *getMemcpyLoopLoweringType(
|
||||
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
|
||||
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
|
||||
std::optional<uint32_t> AtomicElementSize) const = 0;
|
||||
|
||||
virtual void getMemcpyLoopResidualLoweringType(
|
||||
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
||||
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicCpySize) const = 0;
|
||||
std::optional<uint32_t> AtomicCpySize) const = 0;
|
||||
virtual bool areInlineCompatible(const Function *Caller,
|
||||
const Function *Callee) const = 0;
|
||||
virtual bool areTypesABICompatible(const Function *Caller,
|
||||
|
@ -2008,18 +2005,18 @@ public:
|
|||
PredicationStyle emitGetActiveLaneMask() override {
|
||||
return Impl.emitGetActiveLaneMask();
|
||||
}
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) override {
|
||||
std::optional<Instruction *>
|
||||
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
|
||||
return Impl.instCombineIntrinsic(IC, II);
|
||||
}
|
||||
Optional<Value *>
|
||||
std::optional<Value *>
|
||||
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
|
||||
APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) override {
|
||||
return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
|
||||
KnownBitsComputed);
|
||||
}
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -2266,10 +2263,11 @@ public:
|
|||
I, AllowPromotionWithoutCommonHeader);
|
||||
}
|
||||
unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
|
||||
Optional<unsigned> getCacheSize(CacheLevel Level) const override {
|
||||
std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
|
||||
return Impl.getCacheSize(Level);
|
||||
}
|
||||
Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
|
||||
std::optional<unsigned>
|
||||
getCacheAssociativity(CacheLevel Level) const override {
|
||||
return Impl.getCacheAssociativity(Level);
|
||||
}
|
||||
|
||||
|
@ -2407,7 +2405,7 @@ public:
|
|||
}
|
||||
InstructionCost
|
||||
getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) override {
|
||||
return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
||||
}
|
||||
|
@ -2418,7 +2416,7 @@ public:
|
|||
}
|
||||
InstructionCost getExtendedReductionCost(
|
||||
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
|
||||
return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
|
||||
CostKind);
|
||||
|
@ -2461,7 +2459,7 @@ public:
|
|||
Type *getMemcpyLoopLoweringType(
|
||||
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
|
||||
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicElementSize) const override {
|
||||
std::optional<uint32_t> AtomicElementSize) const override {
|
||||
return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
|
||||
DestAddrSpace, SrcAlign, DestAlign,
|
||||
AtomicElementSize);
|
||||
|
@ -2470,7 +2468,7 @@ public:
|
|||
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
||||
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicCpySize) const override {
|
||||
std::optional<uint32_t> AtomicCpySize) const override {
|
||||
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
|
||||
SrcAddrSpace, DestAddrSpace,
|
||||
SrcAlign, DestAlign, AtomicCpySize);
|
||||
|
|
|
@ -175,24 +175,24 @@ public:
|
|||
return PredicationStyle::None;
|
||||
}
|
||||
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const {
|
||||
return None;
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *>
|
||||
std::optional<Value *>
|
||||
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
|
||||
APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) const {
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
SimplifyAndSetOp) const {
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
void getUnrollingPreferences(Loop *, ScalarEvolution &,
|
||||
|
@ -453,25 +453,24 @@ public:
|
|||
}
|
||||
|
||||
unsigned getCacheLineSize() const { return 0; }
|
||||
|
||||
llvm::Optional<unsigned>
|
||||
std::optional<unsigned>
|
||||
getCacheSize(TargetTransformInfo::CacheLevel Level) const {
|
||||
switch (Level) {
|
||||
case TargetTransformInfo::CacheLevel::L1D:
|
||||
[[fallthrough]];
|
||||
case TargetTransformInfo::CacheLevel::L2D:
|
||||
return llvm::None;
|
||||
return std::nullopt;
|
||||
}
|
||||
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
|
||||
}
|
||||
|
||||
llvm::Optional<unsigned>
|
||||
std::optional<unsigned>
|
||||
getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
|
||||
switch (Level) {
|
||||
case TargetTransformInfo::CacheLevel::L1D:
|
||||
[[fallthrough]];
|
||||
case TargetTransformInfo::CacheLevel::L2D:
|
||||
return llvm::None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
|
||||
|
@ -694,7 +693,7 @@ public:
|
|||
}
|
||||
|
||||
InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind) const {
|
||||
return 1;
|
||||
}
|
||||
|
@ -706,7 +705,7 @@ public:
|
|||
|
||||
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
|
||||
Type *ResTy, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) const {
|
||||
return 1;
|
||||
}
|
||||
|
@ -739,10 +738,11 @@ public:
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
|
||||
unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicElementSize) const {
|
||||
Type *
|
||||
getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
|
||||
unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
std::optional<uint32_t> AtomicElementSize) const {
|
||||
return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
|
||||
: Type::getInt8Ty(Context);
|
||||
}
|
||||
|
@ -751,7 +751,7 @@ public:
|
|||
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
||||
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicCpySize) const {
|
||||
std::optional<uint32_t> AtomicCpySize) const {
|
||||
unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
|
||||
Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
|
||||
for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
|
||||
|
|
|
@ -621,21 +621,20 @@ public:
|
|||
return BaseT::emitGetActiveLaneMask();
|
||||
}
|
||||
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
return BaseT::instCombineIntrinsic(IC, II);
|
||||
}
|
||||
|
||||
Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II,
|
||||
APInt DemandedMask,
|
||||
KnownBits &Known,
|
||||
bool &KnownBitsComputed) {
|
||||
std::optional<Value *>
|
||||
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
|
||||
APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) {
|
||||
return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
|
||||
KnownBitsComputed);
|
||||
}
|
||||
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -645,15 +644,15 @@ public:
|
|||
SimplifyAndSetOp);
|
||||
}
|
||||
|
||||
virtual Optional<unsigned>
|
||||
virtual std::optional<unsigned>
|
||||
getCacheSize(TargetTransformInfo::CacheLevel Level) const {
|
||||
return Optional<unsigned>(
|
||||
getST()->getCacheSize(static_cast<unsigned>(Level)));
|
||||
return std::optional<unsigned>(
|
||||
getST()->getCacheSize(static_cast<unsigned>(Level)));
|
||||
}
|
||||
|
||||
virtual Optional<unsigned>
|
||||
virtual std::optional<unsigned>
|
||||
getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
|
||||
Optional<unsigned> TargetResult =
|
||||
std::optional<unsigned> TargetResult =
|
||||
getST()->getCacheAssociativity(static_cast<unsigned>(Level));
|
||||
|
||||
if (TargetResult)
|
||||
|
@ -2283,7 +2282,7 @@ public:
|
|||
}
|
||||
|
||||
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (TTI::requiresOrderedReduction(FMF))
|
||||
return getOrderedReductionCost(Opcode, Ty, CostKind);
|
||||
|
@ -2357,7 +2356,7 @@ public:
|
|||
|
||||
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
|
||||
Type *ResTy, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
// Without any native support, this is equivalent to the cost of
|
||||
// vecreduce.opcode(ext(Ty A)).
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
|
@ -236,13 +237,13 @@ public:
|
|||
/// Level is zero-based, so a value of zero means the first level of
|
||||
/// cache.
|
||||
///
|
||||
virtual Optional<unsigned> getCacheSize(unsigned Level) const;
|
||||
virtual std::optional<unsigned> getCacheSize(unsigned Level) const;
|
||||
|
||||
/// Return the cache associatvity for the given level of cache.
|
||||
/// Level is zero-based, so a value of zero means the first level of
|
||||
/// cache.
|
||||
///
|
||||
virtual Optional<unsigned> getCacheAssociativity(unsigned Level) const;
|
||||
virtual std::optional<unsigned> getCacheAssociativity(unsigned Level) const;
|
||||
|
||||
/// Return the target cache line size in bytes at a given level.
|
||||
///
|
||||
|
|
|
@ -378,12 +378,12 @@ public:
|
|||
LoopInfo *getLoopInfo() const { return LI; }
|
||||
|
||||
// Call target specific combiners
|
||||
Optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II);
|
||||
Optional<Value *>
|
||||
std::optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II);
|
||||
std::optional<Value *>
|
||||
targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask,
|
||||
KnownBits &Known,
|
||||
bool &KnownBitsComputed);
|
||||
Optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic(
|
||||
IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
#ifndef LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H
|
||||
#define LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H
|
||||
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -31,21 +32,19 @@ struct Align;
|
|||
|
||||
/// Emit a loop implementing the semantics of llvm.memcpy where the size is not
|
||||
/// a compile-time constant. Loop will be insterted at \p InsertBefore.
|
||||
void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr,
|
||||
Value *DstAddr, Value *CopyLen, Align SrcAlign,
|
||||
Align DestAlign, bool SrcIsVolatile,
|
||||
bool DstIsVolatile, bool CanOverlap,
|
||||
const TargetTransformInfo &TTI,
|
||||
Optional<unsigned> AtomicSize = None);
|
||||
void createMemCpyLoopUnknownSize(
|
||||
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
|
||||
Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile,
|
||||
bool CanOverlap, const TargetTransformInfo &TTI,
|
||||
std::optional<unsigned> AtomicSize = std::nullopt);
|
||||
|
||||
/// Emit a loop implementing the semantics of an llvm.memcpy whose size is a
|
||||
/// compile time constant. Loop is inserted at \p InsertBefore.
|
||||
void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
|
||||
Value *DstAddr, ConstantInt *CopyLen,
|
||||
Align SrcAlign, Align DestAlign,
|
||||
bool SrcIsVolatile, bool DstIsVolatile,
|
||||
bool CanOverlap, const TargetTransformInfo &TTI,
|
||||
Optional<uint32_t> AtomicCpySize = None);
|
||||
void createMemCpyLoopKnownSize(
|
||||
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
|
||||
ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile,
|
||||
bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
|
||||
std::optional<uint32_t> AtomicCpySize = std::nullopt);
|
||||
|
||||
/// Expand \p MemCpy as a loop. \p MemCpy is not deleted.
|
||||
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI,
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -308,20 +309,20 @@ PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const {
|
|||
return TTIImpl->emitGetActiveLaneMask();
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const {
|
||||
return TTIImpl->instCombineIntrinsic(IC, II);
|
||||
}
|
||||
|
||||
Optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic(
|
||||
std::optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) const {
|
||||
return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
|
||||
KnownBitsComputed);
|
||||
}
|
||||
|
||||
Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -689,12 +690,12 @@ unsigned TargetTransformInfo::getCacheLineSize() const {
|
|||
: TTIImpl->getCacheLineSize();
|
||||
}
|
||||
|
||||
llvm::Optional<unsigned>
|
||||
std::optional<unsigned>
|
||||
TargetTransformInfo::getCacheSize(CacheLevel Level) const {
|
||||
return TTIImpl->getCacheSize(Level);
|
||||
}
|
||||
|
||||
llvm::Optional<unsigned>
|
||||
std::optional<unsigned>
|
||||
TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const {
|
||||
return TTIImpl->getCacheAssociativity(Level);
|
||||
}
|
||||
|
@ -1000,7 +1001,7 @@ InstructionCost TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
|
|||
}
|
||||
|
||||
InstructionCost TargetTransformInfo::getArithmeticReductionCost(
|
||||
unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
|
||||
unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) const {
|
||||
InstructionCost Cost =
|
||||
TTIImpl->getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
||||
|
@ -1019,7 +1020,7 @@ InstructionCost TargetTransformInfo::getMinMaxReductionCost(
|
|||
|
||||
InstructionCost TargetTransformInfo::getExtendedReductionCost(
|
||||
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) const {
|
||||
std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) const {
|
||||
return TTIImpl->getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
|
||||
CostKind);
|
||||
}
|
||||
|
@ -1052,7 +1053,7 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
|
|||
Type *TargetTransformInfo::getMemcpyLoopLoweringType(
|
||||
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
|
||||
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicElementSize) const {
|
||||
std::optional<uint32_t> AtomicElementSize) const {
|
||||
return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
|
||||
DestAddrSpace, SrcAlign, DestAlign,
|
||||
AtomicElementSize);
|
||||
|
@ -1062,7 +1063,7 @@ void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
|
|||
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
||||
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicCpySize) const {
|
||||
std::optional<uint32_t> AtomicCpySize) const {
|
||||
TTIImpl->getMemcpyLoopResidualLoweringType(
|
||||
OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
|
||||
DestAlign, AtomicCpySize);
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -335,11 +336,11 @@ void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const {
|
|||
ForwardingPaths);
|
||||
}
|
||||
|
||||
Optional<unsigned> MCSubtargetInfo::getCacheSize(unsigned Level) const {
|
||||
return None;
|
||||
std::optional<unsigned> MCSubtargetInfo::getCacheSize(unsigned Level) const {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<unsigned>
|
||||
std::optional<unsigned>
|
||||
MCSubtargetInfo::getCacheAssociativity(unsigned Level) const {
|
||||
return None;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "llvm/Transforms/InstCombine/InstCombiner.h"
|
||||
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
using namespace llvm;
|
||||
using namespace llvm::PatternMatch;
|
||||
|
||||
|
@ -521,8 +522,8 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|||
|
||||
/// The function will remove redundant reinterprets casting in the presence
|
||||
/// of the control flow
|
||||
static Optional<Instruction *> processPhiNode(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
SmallVector<Instruction *, 32> Worklist;
|
||||
auto RequiredType = II.getType();
|
||||
|
||||
|
@ -531,7 +532,7 @@ static Optional<Instruction *> processPhiNode(InstCombiner &IC,
|
|||
|
||||
// Don't create a new Phi unless we can remove the old one.
|
||||
if (!PN->hasOneUse())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
for (Value *IncValPhi : PN->incoming_values()) {
|
||||
auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
|
||||
|
@ -539,7 +540,7 @@ static Optional<Instruction *> processPhiNode(InstCombiner &IC,
|
|||
Reinterpret->getIntrinsicID() !=
|
||||
Intrinsic::aarch64_sve_convert_to_svbool ||
|
||||
RequiredType != Reinterpret->getArgOperand(0)->getType())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Create the new Phi
|
||||
|
@ -568,11 +569,11 @@ static Optional<Instruction *> processPhiNode(InstCombiner &IC,
|
|||
// and` into a `<vscale x 4 x i1> and`. This is profitable because
|
||||
// to_svbool must zero the new lanes during widening, whereas
|
||||
// from_svbool is free.
|
||||
static Optional<Instruction *> tryCombineFromSVBoolBinOp(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *>
|
||||
tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) {
|
||||
auto BinOp = dyn_cast<IntrinsicInst>(II.getOperand(0));
|
||||
if (!BinOp)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
auto IntrinsicID = BinOp->getIntrinsicID();
|
||||
switch (IntrinsicID) {
|
||||
|
@ -585,7 +586,7 @@ static Optional<Instruction *> tryCombineFromSVBoolBinOp(InstCombiner &IC,
|
|||
case Intrinsic::aarch64_sve_orr_z:
|
||||
break;
|
||||
default:
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto BinOpPred = BinOp->getOperand(0);
|
||||
|
@ -595,12 +596,12 @@ static Optional<Instruction *> tryCombineFromSVBoolBinOp(InstCombiner &IC,
|
|||
auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
|
||||
if (!PredIntr ||
|
||||
PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
auto PredOp = PredIntr->getOperand(0);
|
||||
auto PredOpTy = cast<VectorType>(PredOp->getType());
|
||||
if (PredOpTy != II.getType())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
@ -620,8 +621,8 @@ static Optional<Instruction *> tryCombineFromSVBoolBinOp(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, NarrowedBinOp);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *>
|
||||
instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) {
|
||||
// If the reinterpret instruction operand is a PHI Node
|
||||
if (isa<PHINode>(II.getArgOperand(0)))
|
||||
return processPhiNode(IC, II);
|
||||
|
@ -663,32 +664,32 @@ static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
|
|||
// If no viable replacement in the conversion chain was found, there is
|
||||
// nothing to do.
|
||||
if (!EarliestReplacement)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
return IC.replaceInstUsesWith(II, EarliestReplacement);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVESel(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IRBuilder<> Builder(&II);
|
||||
auto Select = Builder.CreateSelect(II.getOperand(0), II.getOperand(1),
|
||||
II.getOperand(2));
|
||||
return IC.replaceInstUsesWith(II, Select);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
|
||||
if (!Pg)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
const auto PTruePattern =
|
||||
cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
|
||||
if (PTruePattern != AArch64SVEPredPattern::vl1)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// The intrinsic is inserting into lane zero so use an insert instead.
|
||||
auto *IdxTy = Type::getInt64Ty(II.getContext());
|
||||
|
@ -700,8 +701,8 @@ static Optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, Insert);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
// Replace DupX with a regular IR splat.
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
@ -712,8 +713,8 @@ static Optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, Splat);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
LLVMContext &Ctx = II.getContext();
|
||||
IRBuilder<> Builder(Ctx);
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
@ -721,49 +722,49 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
|
|||
// Check that the predicate is all active
|
||||
auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
|
||||
if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
const auto PTruePattern =
|
||||
cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
|
||||
if (PTruePattern != AArch64SVEPredPattern::all)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// Check that we have a compare of zero..
|
||||
auto *SplatValue =
|
||||
dyn_cast_or_null<ConstantInt>(getSplatValue(II.getArgOperand(2)));
|
||||
if (!SplatValue || !SplatValue->isZero())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// ..against a dupq
|
||||
auto *DupQLane = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
|
||||
if (!DupQLane ||
|
||||
DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// Where the dupq is a lane 0 replicate of a vector insert
|
||||
if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
|
||||
if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// Where the vector insert is a fixed constant vector insert into undef at
|
||||
// index zero
|
||||
if (!isa<UndefValue>(VecIns->getArgOperand(0)))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
|
||||
if (!ConstVec)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
|
||||
auto *OutTy = dyn_cast<ScalableVectorType>(II.getType());
|
||||
if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
unsigned NumElts = VecTy->getNumElements();
|
||||
unsigned PredicateBits = 0;
|
||||
|
@ -772,7 +773,7 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
|
|||
for (unsigned I = 0; I < NumElts; ++I) {
|
||||
auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(I));
|
||||
if (!Arg)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
if (!Arg->isZero())
|
||||
PredicateBits |= 1 << (I * (16 / NumElts));
|
||||
}
|
||||
|
@ -797,7 +798,7 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
|
|||
// Ensure all relevant bits are set
|
||||
for (unsigned I = 0; I < 16; I += PredSize)
|
||||
if ((PredicateBits & (1 << I)) == 0)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
auto *PTruePat =
|
||||
ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
|
||||
|
@ -813,8 +814,8 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, ConvertFromSVBool);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
Value *Pg = II.getArgOperand(0);
|
||||
|
@ -855,10 +856,10 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
|
|||
|
||||
auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
|
||||
if (!IntrPG)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
const auto PTruePattern =
|
||||
cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
|
||||
|
@ -866,7 +867,7 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
|
|||
// Can the intrinsic's predicate be converted to a known constant index?
|
||||
unsigned MinNumElts = getNumElementsFromSVEPredPattern(PTruePattern);
|
||||
if (!MinNumElts)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
unsigned Idx = MinNumElts - 1;
|
||||
// Increment the index if extracting the element after the last active
|
||||
|
@ -879,7 +880,7 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
|
|||
// maintain what the user asked for until an alternative is proven faster.
|
||||
auto *PgVTy = cast<ScalableVectorType>(Pg->getType());
|
||||
if (Idx >= PgVTy->getMinNumElements())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// The intrinsic is extracting a fixed lane so use an extract instead.
|
||||
auto *IdxTy = Type::getInt64Ty(II.getContext());
|
||||
|
@ -889,8 +890,8 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, Extract);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
// The SIMD&FP variant of CLAST[AB] is significantly faster than the scalar
|
||||
// integer variant across a variety of micro-architectures. Replace scalar
|
||||
// integer CLAST[AB] intrinsic with optimal SIMD&FP variant. A simple
|
||||
|
@ -906,12 +907,12 @@ static Optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
|
|||
Type *Ty = II.getType();
|
||||
|
||||
if (!Ty->isIntegerTy())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
Type *FPTy;
|
||||
switch (cast<IntegerType>(Ty)->getBitWidth()) {
|
||||
default:
|
||||
return None;
|
||||
return std::nullopt;
|
||||
case 16:
|
||||
FPTy = Builder.getHalfTy();
|
||||
break;
|
||||
|
@ -933,8 +934,8 @@ static Optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, FPIItoInt);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
LLVMContext &Ctx = II.getContext();
|
||||
IRBuilder<> Builder(Ctx);
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
@ -950,7 +951,7 @@ static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, RDFFR);
|
||||
}
|
||||
|
||||
static Optional<Instruction *>
|
||||
static std::optional<Instruction *>
|
||||
instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
|
||||
const auto Pattern = cast<ConstantInt>(II.getArgOperand(0))->getZExtValue();
|
||||
|
||||
|
@ -968,13 +969,13 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
|
|||
unsigned MinNumElts = getNumElementsFromSVEPredPattern(Pattern);
|
||||
|
||||
return MinNumElts && NumElts >= MinNumElts
|
||||
? Optional<Instruction *>(IC.replaceInstUsesWith(
|
||||
? std::optional<Instruction *>(IC.replaceInstUsesWith(
|
||||
II, ConstantInt::get(II.getType(), MinNumElts)))
|
||||
: None;
|
||||
: std::nullopt;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
Value *PgVal = II.getArgOperand(0);
|
||||
Value *OpVal = II.getArgOperand(1);
|
||||
|
||||
|
@ -1000,7 +1001,7 @@ static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
|
|||
IntrinsicInst *Op = dyn_cast<IntrinsicInst>(OpVal);
|
||||
|
||||
if (!Pg || !Op)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
Intrinsic::ID OpIID = Op->getIntrinsicID();
|
||||
|
||||
|
@ -1041,11 +1042,11 @@ static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, PTest);
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *>
|
||||
instCombineSVEVectorFMLA(InstCombiner &IC, IntrinsicInst &II) {
|
||||
// fold (fadd p a (fmul p b c)) -> (fma p a b c)
|
||||
Value *P = II.getOperand(0);
|
||||
Value *A = II.getOperand(1);
|
||||
|
@ -1053,18 +1054,18 @@ static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
|
|||
Value *B, *C;
|
||||
if (!match(FMul, m_Intrinsic<Intrinsic::aarch64_sve_fmul>(
|
||||
m_Specific(P), m_Value(B), m_Value(C))))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
if (!FMul->hasOneUse())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
llvm::FastMathFlags FAddFlags = II.getFastMathFlags();
|
||||
// Stop the combine when the flags on the inputs differ in case dropping flags
|
||||
// would lead to us missing out on more beneficial optimizations.
|
||||
if (FAddFlags != cast<CallInst>(FMul)->getFastMathFlags())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
if (!FAddFlags.allowContract())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
@ -1090,7 +1091,7 @@ static bool isAllActivePredicate(Value *Pred) {
|
|||
m_ConstantInt<AArch64SVEPredPattern::all>()));
|
||||
}
|
||||
|
||||
static Optional<Instruction *>
|
||||
static std::optional<Instruction *>
|
||||
instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
@ -1113,7 +1114,7 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
|
|||
return IC.replaceInstUsesWith(II, MaskedLoad);
|
||||
}
|
||||
|
||||
static Optional<Instruction *>
|
||||
static std::optional<Instruction *>
|
||||
instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
|
@ -1149,14 +1150,14 @@ static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
|
|||
}
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEVectorBinOp(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *>
|
||||
instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
|
||||
auto *OpPredicate = II.getOperand(0);
|
||||
auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
|
||||
if (BinOpCode == Instruction::BinaryOpsEnd ||
|
||||
!match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
|
||||
m_ConstantInt<AArch64SVEPredPattern::all>())))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
Builder.setFastMathFlags(II.getFastMathFlags());
|
||||
|
@ -1165,15 +1166,15 @@ static Optional<Instruction *> instCombineSVEVectorBinOp(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, BinOp);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *>
|
||||
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
|
||||
if (auto FMLA = instCombineSVEVectorFMLA(IC, II))
|
||||
return FMLA;
|
||||
return instCombineSVEVectorBinOp(IC, II);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
auto *OpPredicate = II.getOperand(0);
|
||||
auto *OpMultiplicand = II.getOperand(1);
|
||||
auto *OpMultiplier = II.getOperand(2);
|
||||
|
@ -1219,8 +1220,8 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
|
|||
return instCombineSVEVectorBinOp(IC, II);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
Value *UnpackArg = II.getArgOperand(0);
|
||||
|
@ -1239,10 +1240,10 @@ static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, NewVal);
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
auto *OpVal = II.getOperand(0);
|
||||
auto *OpIndices = II.getOperand(1);
|
||||
VectorType *VTy = cast<VectorType>(II.getType());
|
||||
|
@ -1252,7 +1253,7 @@ static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
|
|||
auto *SplatValue = dyn_cast_or_null<ConstantInt>(getSplatValue(OpIndices));
|
||||
if (!SplatValue ||
|
||||
SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to
|
||||
// splat_vector(extractelement(OpVal, SplatValue)) for further optimization.
|
||||
|
@ -1266,8 +1267,8 @@ static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, VectorSplat);
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
// zip1(uzp1(A, B), uzp2(A, B)) --> A
|
||||
// zip2(uzp1(A, B), uzp2(A, B)) --> B
|
||||
Value *A, *B;
|
||||
|
@ -1278,11 +1279,11 @@ static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(
|
||||
II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *>
|
||||
instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
|
||||
Value *Mask = II.getOperand(0);
|
||||
Value *BasePtr = II.getOperand(1);
|
||||
Value *Index = II.getOperand(2);
|
||||
|
@ -1302,8 +1303,8 @@ static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
|
|||
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
|
||||
|
||||
Type *VecPtrTy = PointerType::getUnqual(Ty);
|
||||
Value *Ptr = Builder.CreateGEP(
|
||||
cast<VectorType>(Ty)->getElementType(), BasePtr, IndexBase);
|
||||
Value *Ptr = Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
|
||||
BasePtr, IndexBase);
|
||||
Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
|
||||
CallInst *MaskedLoad =
|
||||
Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
|
||||
|
@ -1311,11 +1312,11 @@ static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, MaskedLoad);
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *>
|
||||
instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
|
||||
Value *Val = II.getOperand(0);
|
||||
Value *Mask = II.getOperand(1);
|
||||
Value *BasePtr = II.getOperand(2);
|
||||
|
@ -1334,8 +1335,8 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
|
|||
Align Alignment =
|
||||
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
|
||||
|
||||
Value *Ptr = Builder.CreateGEP(
|
||||
cast<VectorType>(Ty)->getElementType(), BasePtr, IndexBase);
|
||||
Value *Ptr = Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
|
||||
BasePtr, IndexBase);
|
||||
Type *VecPtrTy = PointerType::getUnqual(Ty);
|
||||
Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
|
||||
|
||||
|
@ -1344,11 +1345,11 @@ static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
|
|||
return IC.eraseInstFromFunction(II);
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IRBuilder<> Builder(II.getContext());
|
||||
Builder.SetInsertPoint(&II);
|
||||
Type *Int32Ty = Builder.getInt32Ty();
|
||||
|
@ -1359,7 +1360,7 @@ static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
|
|||
Value *SplatValue = getSplatValue(DivVec);
|
||||
ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
|
||||
if (!SplatConstantInt)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
APInt Divisor = SplatConstantInt->getValue();
|
||||
|
||||
if (Divisor.isPowerOf2()) {
|
||||
|
@ -1378,21 +1379,21 @@ static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, NEG);
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
Value *A = II.getArgOperand(0);
|
||||
Value *B = II.getArgOperand(1);
|
||||
if (A == B)
|
||||
return IC.replaceInstUsesWith(II, A);
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static Optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
static std::optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
|
||||
IntrinsicInst &II) {
|
||||
IRBuilder<> Builder(&II);
|
||||
Value *Pred = II.getOperand(0);
|
||||
Value *Vec = II.getOperand(1);
|
||||
|
@ -1405,21 +1406,20 @@ static Optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
|
|||
!match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
|
||||
m_Value(MergedValue), m_Value(AbsPred), m_Value())))
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// Transform is valid if any of the following are true:
|
||||
// * The ABS merge value is an undef or non-negative
|
||||
// * The ABS predicate is all active
|
||||
// * The ABS predicate and the SRSHL predicates are the same
|
||||
if (!isa<UndefValue>(MergedValue) &&
|
||||
!match(MergedValue, m_NonNegative()) &&
|
||||
if (!isa<UndefValue>(MergedValue) && !match(MergedValue, m_NonNegative()) &&
|
||||
AbsPred != Pred && !isAllActivePredicate(AbsPred))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// Only valid when the shift amount is non-negative, otherwise the rounding
|
||||
// behaviour of SRSHL cannot be ignored.
|
||||
if (!match(Shift, m_NonNegative()))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()},
|
||||
{Pred, Vec, Shift});
|
||||
|
@ -1427,7 +1427,7 @@ static Optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
|
|||
return IC.replaceInstUsesWith(II, LSL);
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const {
|
||||
Intrinsic::ID IID = II.getIntrinsicID();
|
||||
|
@ -1499,10 +1499,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
|
|||
return instCombineSVESrshl(IC, II);
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
|
||||
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -1525,7 +1525,7 @@ Optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
|||
break;
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
TypeSize
|
||||
|
@ -2814,7 +2814,7 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(
|
|||
|
||||
InstructionCost
|
||||
AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (TTI::requiresOrderedReduction(FMF)) {
|
||||
if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
|
||||
|
|
|
@ -112,10 +112,10 @@ public:
|
|||
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -370,7 +370,7 @@ public:
|
|||
}
|
||||
|
||||
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "llvm/ADT/FloatingPointMode.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/Transforms/InstCombine/InstCombiner.h"
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -114,14 +115,14 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
|
|||
/// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
|
||||
/// modified arguments (based on OldIntr) and replaces InstToReplace with
|
||||
/// this newly created intrinsic call.
|
||||
static Optional<Instruction *> modifyIntrinsicCall(
|
||||
static std::optional<Instruction *> modifyIntrinsicCall(
|
||||
IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
|
||||
InstCombiner &IC,
|
||||
std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
|
||||
Func) {
|
||||
SmallVector<Type *, 4> ArgTys;
|
||||
if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
SmallVector<Value *, 8> Args(OldIntr.args());
|
||||
|
||||
|
@ -149,7 +150,7 @@ static Optional<Instruction *> modifyIntrinsicCall(
|
|||
return RetValue;
|
||||
}
|
||||
|
||||
static Optional<Instruction *>
|
||||
static std::optional<Instruction *>
|
||||
simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
|
||||
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
|
||||
IntrinsicInst &II, InstCombiner &IC) {
|
||||
|
@ -252,7 +253,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
|
|||
|
||||
// Try to use A16 or G16
|
||||
if (!ST->hasA16() && !ST->hasG16())
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
// Address is interpreted as float if the instruction has a sampler or as
|
||||
// unsigned int if there is no sampler.
|
||||
|
@ -295,7 +296,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
|
|||
|
||||
if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
|
||||
ImageDimIntr->CoordStart))
|
||||
return None;
|
||||
return std::nullopt;
|
||||
|
||||
Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
|
||||
: Type::getInt16Ty(II.getContext());
|
||||
|
@ -348,7 +349,7 @@ bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
|
|||
return false;
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
Intrinsic::ID IID = II.getIntrinsicID();
|
||||
switch (IID) {
|
||||
|
@ -1059,7 +1060,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
|||
}
|
||||
}
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
|
||||
|
@ -1204,7 +1205,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
|
|||
return Shuffle;
|
||||
}
|
||||
|
||||
Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -1228,5 +1229,5 @@ Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
|||
break;
|
||||
}
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -401,7 +402,7 @@ bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
|
|||
Type *GCNTTIImpl::getMemcpyLoopLoweringType(
|
||||
LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
|
||||
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicElementSize) const {
|
||||
std::optional<uint32_t> AtomicElementSize) const {
|
||||
|
||||
if (AtomicElementSize)
|
||||
return Type::getIntNTy(Context, *AtomicElementSize * 8);
|
||||
|
@ -433,7 +434,7 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
|
|||
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
||||
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicCpySize) const {
|
||||
std::optional<uint32_t> AtomicCpySize) const {
|
||||
assert(RemainingBytes < 16);
|
||||
|
||||
if (AtomicCpySize)
|
||||
|
@ -756,7 +757,7 @@ InstructionCost GCNTTIImpl::getCFInstrCost(unsigned Opcode,
|
|||
|
||||
InstructionCost
|
||||
GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (TTI::requiresOrderedReduction(FMF))
|
||||
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include <optional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -132,16 +133,16 @@ public:
|
|||
unsigned AddrSpace) const;
|
||||
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
|
||||
unsigned AddrSpace) const;
|
||||
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
|
||||
unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicElementSize) const;
|
||||
Type *getMemcpyLoopLoweringType(
|
||||
LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
|
||||
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
|
||||
std::optional<uint32_t> AtomicElementSize) const;
|
||||
|
||||
void getMemcpyLoopResidualLoweringType(
|
||||
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
|
||||
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
|
||||
unsigned SrcAlign, unsigned DestAlign,
|
||||
Optional<uint32_t> AtomicCpySize) const;
|
||||
std::optional<uint32_t> AtomicCpySize) const;
|
||||
unsigned getMaxInterleaveFactor(unsigned VF);
|
||||
|
||||
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
|
||||
|
@ -188,9 +189,9 @@ public:
|
|||
|
||||
bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
|
||||
InstCombiner &IC) const;
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -213,7 +214,7 @@ public:
|
|||
int getInlinerVectorBonusPercent() { return 0; }
|
||||
|
||||
InstructionCost getArithmeticReductionCost(
|
||||
unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
|
||||
unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
@ -117,7 +118,7 @@ ARMTTIImpl::getPreferredAddressingMode(const Loop *L,
|
|||
return TTI::AMK_None;
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
using namespace PatternMatch;
|
||||
Intrinsic::ID IID = II.getIntrinsicID();
|
||||
|
@ -243,13 +244,13 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
|||
return IC.eraseInstFromFunction(*User);
|
||||
}
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
|
||||
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -271,7 +272,7 @@ Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
|||
// The other lanes will be defined from the inserted elements.
|
||||
UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
|
||||
: APInt::getHighBitsSet(2, 1));
|
||||
return None;
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
switch (II.getIntrinsicID()) {
|
||||
|
@ -288,7 +289,7 @@ Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
|||
break;
|
||||
}
|
||||
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
|
||||
|
@ -1653,7 +1654,7 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(
|
|||
|
||||
InstructionCost
|
||||
ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (TTI::requiresOrderedReduction(FMF))
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
|
||||
|
@ -1678,7 +1679,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
|||
|
||||
InstructionCost ARMTTIImpl::getExtendedReductionCost(
|
||||
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
|
||||
std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
|
||||
EVT ValVT = TLI->getValueType(DL, ValTy);
|
||||
EVT ResVT = TLI->getValueType(DL, ResTy);
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "llvm/IR/Constant.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/MC/SubtargetFeature.h"
|
||||
#include <optional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -118,9 +119,9 @@ public:
|
|||
return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
|
||||
}
|
||||
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -274,11 +275,11 @@ public:
|
|||
const Instruction *I = nullptr);
|
||||
|
||||
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind);
|
||||
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
|
||||
Type *ResTy, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind);
|
||||
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
|
||||
VectorType *ValTy,
|
||||
|
|
|
@ -414,12 +414,12 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
|
|||
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
|
||||
return I;
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include <optional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -53,8 +54,8 @@ public:
|
|||
AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
|
||||
}
|
||||
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
|
||||
// Loads and stores can be vectorized if the alignment is at least as big as
|
||||
// the load/store we want to vectorize.
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Transforms/InstCombine/InstCombiner.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -60,7 +61,7 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
|
|||
return TTI::PSK_Software;
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
Intrinsic::ID IID = II.getIntrinsicID();
|
||||
switch (IID) {
|
||||
|
@ -160,7 +161,7 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
|||
}
|
||||
break;
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
InstructionCost PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include <optional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -41,8 +42,8 @@ public:
|
|||
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
||||
TLI(ST->getTargetLowering()) {}
|
||||
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
|
||||
IntrinsicInst & II) const;
|
||||
|
||||
/// \name Scalar TTI Implementations
|
||||
/// @{
|
||||
|
|
|
@ -784,7 +784,7 @@ RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
|
|||
|
||||
InstructionCost
|
||||
RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
|
||||
return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
|
||||
|
@ -815,7 +815,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
|||
|
||||
InstructionCost RISCVTTIImpl::getExtendedReductionCost(
|
||||
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
|
||||
std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
|
||||
if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
|
||||
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
|
||||
FMF, CostKind);
|
||||
|
|
|
@ -137,12 +137,12 @@ public:
|
|||
TTI::TargetCostKind CostKind);
|
||||
|
||||
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
|
||||
Type *ResTy, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
InstructionCost
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "llvm/IR/IntrinsicsX86.h"
|
||||
#include "llvm/Support/KnownBits.h"
|
||||
#include "llvm/Transforms/InstCombine/InstCombiner.h"
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -924,7 +925,7 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
|
|||
return Builder.CreateShuffleVector(V1, makeArrayRef(Indexes, Size));
|
||||
}
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
|
||||
unsigned DemandedWidth) {
|
||||
|
@ -1730,10 +1731,10 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
|
||||
std::optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) const {
|
||||
switch (II.getIntrinsicID()) {
|
||||
|
@ -1770,10 +1771,10 @@ Optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
|
|||
break;
|
||||
}
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -2025,5 +2026,5 @@ Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
|
|||
UndefElts.setHighBits(VWidth / 2);
|
||||
break;
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
@ -75,7 +76,7 @@ struct CostKindCosts {
|
|||
unsigned CodeSizeCost = ~0U;
|
||||
unsigned SizeAndLatencyCost = ~0U;
|
||||
|
||||
llvm::Optional<unsigned>
|
||||
std::optional<unsigned>
|
||||
operator[](TargetTransformInfo::TargetCostKind Kind) const {
|
||||
unsigned Cost = ~0U;
|
||||
switch (Kind) {
|
||||
|
@ -93,7 +94,7 @@ struct CostKindCosts {
|
|||
break;
|
||||
}
|
||||
if (Cost == ~0U)
|
||||
return None;
|
||||
return std::nullopt;
|
||||
return Cost;
|
||||
}
|
||||
};
|
||||
|
@ -108,7 +109,7 @@ X86TTIImpl::getPopcntSupport(unsigned TyWidth) {
|
|||
return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software;
|
||||
}
|
||||
|
||||
llvm::Optional<unsigned> X86TTIImpl::getCacheSize(
|
||||
std::optional<unsigned> X86TTIImpl::getCacheSize(
|
||||
TargetTransformInfo::CacheLevel Level) const {
|
||||
switch (Level) {
|
||||
case TargetTransformInfo::CacheLevel::L1D:
|
||||
|
@ -138,7 +139,7 @@ llvm::Optional<unsigned> X86TTIImpl::getCacheSize(
|
|||
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
|
||||
}
|
||||
|
||||
llvm::Optional<unsigned> X86TTIImpl::getCacheAssociativity(
|
||||
std::optional<unsigned> X86TTIImpl::getCacheAssociativity(
|
||||
TargetTransformInfo::CacheLevel Level) const {
|
||||
// - Penryn
|
||||
// - Nehalem
|
||||
|
@ -4909,7 +4910,7 @@ InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
|
|||
|
||||
InstructionCost
|
||||
X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind) {
|
||||
if (TTI::requiresOrderedReduction(FMF))
|
||||
return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "X86TargetMachine.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include <optional>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
|
@ -113,9 +114,9 @@ public:
|
|||
|
||||
/// \name Cache TTI Implementation
|
||||
/// @{
|
||||
llvm::Optional<unsigned> getCacheSize(
|
||||
std::optional<unsigned> getCacheSize(
|
||||
TargetTransformInfo::CacheLevel Level) const override;
|
||||
llvm::Optional<unsigned> getCacheAssociativity(
|
||||
std::optional<unsigned> getCacheAssociativity(
|
||||
TargetTransformInfo::CacheLevel Level) const override;
|
||||
/// @}
|
||||
|
||||
|
@ -171,13 +172,13 @@ public:
|
|||
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
|
||||
const SCEV *Ptr);
|
||||
|
||||
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
Optional<Value *>
|
||||
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
|
||||
IntrinsicInst &II) const;
|
||||
std::optional<Value *>
|
||||
simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
|
||||
APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) const;
|
||||
Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
|
||||
InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
|
||||
APInt &UndefElts2, APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -189,7 +190,7 @@ public:
|
|||
TTI::TargetCostKind CostKind);
|
||||
|
||||
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
|
||||
Optional<FastMathFlags> FMF,
|
||||
std::optional<FastMathFlags> FMF,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
|
||||
|
|
|
@ -72,6 +72,7 @@
|
|||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
|
@ -2827,7 +2828,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
|
|||
}
|
||||
default: {
|
||||
// Handle target specific intrinsics
|
||||
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
|
||||
std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
|
||||
if (V)
|
||||
return V.value();
|
||||
break;
|
||||
|
|
|
@ -971,7 +971,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
|
|||
}
|
||||
default: {
|
||||
// Handle target specific intrinsics
|
||||
Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
|
||||
std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
|
||||
*II, DemandedMask, Known, KnownBitsComputed);
|
||||
if (V)
|
||||
return V.value();
|
||||
|
@ -1696,7 +1696,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
|
|||
}
|
||||
default: {
|
||||
// Handle target specific intrinsics
|
||||
Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
|
||||
*II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
|
||||
simplifyAndSetOp);
|
||||
if (V)
|
||||
|
|
|
@ -170,16 +170,16 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024),
|
|||
static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
|
||||
cl::Hidden, cl::init(true));
|
||||
|
||||
Optional<Instruction *>
|
||||
std::optional<Instruction *>
|
||||
InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
|
||||
// Handle target specific intrinsics
|
||||
if (II.getCalledFunction()->isTargetIntrinsic()) {
|
||||
return TTI.instCombineIntrinsic(*this, II);
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
|
||||
std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
|
||||
IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
|
||||
bool &KnownBitsComputed) {
|
||||
// Handle target specific intrinsics
|
||||
|
@ -187,10 +187,10 @@ Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
|
|||
return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
|
||||
KnownBitsComputed);
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
|
||||
std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
|
||||
IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
|
||||
APInt &UndefElts3,
|
||||
std::function<void(Instruction *, unsigned, APInt, APInt &)>
|
||||
|
@ -201,7 +201,7 @@ Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
|
|||
*this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
|
||||
SimplifyAndSetOp);
|
||||
}
|
||||
return None;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
|
||||
|
|
|
@ -13,16 +13,15 @@
|
|||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/MDBuilder.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
#include <optional>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
|
||||
Value *DstAddr, ConstantInt *CopyLen,
|
||||
Align SrcAlign, Align DstAlign,
|
||||
bool SrcIsVolatile, bool DstIsVolatile,
|
||||
bool CanOverlap,
|
||||
const TargetTransformInfo &TTI,
|
||||
Optional<uint32_t> AtomicElementSize) {
|
||||
void llvm::createMemCpyLoopKnownSize(
|
||||
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
|
||||
ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile,
|
||||
bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
|
||||
std::optional<uint32_t> AtomicElementSize) {
|
||||
// No need to expand zero length copies.
|
||||
if (CopyLen->isZero())
|
||||
return;
|
||||
|
@ -173,13 +172,11 @@ void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
|
|||
"Bytes copied should match size in the call!");
|
||||
}
|
||||
|
||||
void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
|
||||
Value *SrcAddr, Value *DstAddr,
|
||||
Value *CopyLen, Align SrcAlign,
|
||||
Align DstAlign, bool SrcIsVolatile,
|
||||
bool DstIsVolatile, bool CanOverlap,
|
||||
const TargetTransformInfo &TTI,
|
||||
Optional<uint32_t> AtomicElementSize) {
|
||||
void llvm::createMemCpyLoopUnknownSize(
|
||||
Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
|
||||
Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
|
||||
bool CanOverlap, const TargetTransformInfo &TTI,
|
||||
std::optional<uint32_t> AtomicElementSize) {
|
||||
BasicBlock *PreLoopBB = InsertBefore->getParent();
|
||||
BasicBlock *PostLoopBB =
|
||||
PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");
|
||||
|
|
Loading…
Reference in New Issue