Revert "TargetTransformInfo: convert Optional to std::optional"

This reverts commit b83711248c. Some buildbots are failing.
2022-12-02 11:33:24 -08:00 · 2022-12-02 11:33:24 -08:00 · 4e12d1836a
parent c414bbefe4
commit 4e12d1836a
28 changed files with 316 additions and 323 deletions
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -552,21 +552,22 @@ public:
  /// intrinsics. This function will be called from the InstCombine pass every
  /// time a target-specific intrinsic is encountered.
  ///
-  /// \returns std::nullopt to not do anything target specific or a value that
-  /// will be returned from the InstCombiner. It is possible to return null and
-  /// stop further processing of the intrinsic by returning nullptr.
-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
-                                                    IntrinsicInst & II) const;
+  /// \returns None to not do anything target specific or a value that will be
+  /// returned from the InstCombiner. It is possible to return null and stop
+  /// further processing of the intrinsic by returning nullptr.
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const;
  /// Can be used to implement target-specific instruction combining.
  /// \see instCombineIntrinsic
-  std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
-      InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
-      KnownBits & Known, bool &KnownBitsComputed) const;
+  Optional<Value *>
+  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+                                   APInt DemandedMask, KnownBits &Known,
+                                   bool &KnownBitsComputed) const;
  /// Can be used to implement target-specific instruction combining.
  /// \see instCombineIntrinsic
-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
-      InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
-      APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
          SimplifyAndSetOp) const;
  /// @}
@ -970,7 +971,7 @@ public:
  unsigned getMinVectorRegisterBitWidth() const;

  /// \return The maximum value of vscale if the target specifies an
-  ///  architectural maximum vector length, and std::nullopt otherwise.
+  ///  architectural maximum vector length, and None otherwise.
  std::optional<unsigned> getMaxVScale() const;

  /// \return the value of vscale to tune the cost model for.
@ -1027,10 +1028,10 @@ public:
  };

  /// \return The size of the cache level in bytes, if available.
-  std::optional<unsigned> getCacheSize(CacheLevel Level) const;
+  Optional<unsigned> getCacheSize(CacheLevel Level) const;

  /// \return The associativity of the cache level, if available.
-  std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
+  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;

  /// \return How much before a load we should place the prefetch
  /// instruction.  This is currently measured in number of
@ -1263,8 +1264,8 @@ public:

  /// A helper function to determine the type of reduction algorithm used
  /// for a given \p Opcode and set of FastMathFlags \p FMF.
-  static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
-    return FMF && !(*FMF).allowReassoc();
+  static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
+    return FMF != None && !(*FMF).allowReassoc();
  }

  /// Calculate the cost of vector reduction intrinsics.
@ -1292,7 +1293,7 @@ public:
  ///   allowed.
  ///
  InstructionCost getArithmeticReductionCost(
-      unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
+      unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

  InstructionCost getMinMaxReductionCost(
@ -1314,7 +1315,7 @@ public:
  /// ResTy vecreduce.opcode(ext(Ty A)).
  InstructionCost getExtendedReductionCost(
      unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
-      std::optional<FastMathFlags> FMF,
+      Optional<FastMathFlags> FMF,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
@ -1368,10 +1369,11 @@ public:
                                           Type *ExpectedType) const;

  /// \returns The type to use in a loop expansion of a memcpy call.
-  Type *getMemcpyLoopLoweringType(
-      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
-      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
+  Type *
+  getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+                            unsigned SrcAddrSpace, unsigned DestAddrSpace,
+                            unsigned SrcAlign, unsigned DestAlign,
+                            Optional<uint32_t> AtomicElementSize = None) const;

  /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
  /// \param RemainingBytes The number of bytes to copy.
@ -1383,7 +1385,7 @@ public:
      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
      unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
+      Optional<uint32_t> AtomicCpySize = None) const;

  /// \returns True if the two functions have compatible attributes for inlining
  /// purposes.
@ -1608,14 +1610,15 @@ public:
                              DominatorTree *DT, LoopVectorizationLegality *LVL,
                              InterleavedAccessInfo *IAI) = 0;
  virtual PredicationStyle emitGetActiveLaneMask() = 0;
-  virtual std::optional<Instruction *> instCombineIntrinsic(
-      InstCombiner &IC, IntrinsicInst &II) = 0;
-  virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
-      InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
-      KnownBits & Known, bool &KnownBitsComputed) = 0;
-  virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
-      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
-      APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
+  virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                                       IntrinsicInst &II) = 0;
+  virtual Optional<Value *>
+  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+                                   APInt DemandedMask, KnownBits &Known,
+                                   bool &KnownBitsComputed) = 0;
+  virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
          SimplifyAndSetOp) = 0;
  virtual bool isLegalAddImmediate(int64_t Imm) = 0;
@ -1724,9 +1727,8 @@ public:
  virtual bool shouldConsiderAddressTypePromotion(
      const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
  virtual unsigned getCacheLineSize() const = 0;
-  virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
-  virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
-      const = 0;
+  virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
+  virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;

  /// \return How much before a load we should place the prefetch
  /// instruction.  This is currently measured in number of
@ -1816,14 +1818,14 @@ public:
      bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
  virtual InstructionCost
  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                             std::optional<FastMathFlags> FMF,
+                             Optional<FastMathFlags> FMF,
                             TTI::TargetCostKind CostKind) = 0;
  virtual InstructionCost
  getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
                         TTI::TargetCostKind CostKind) = 0;
  virtual InstructionCost getExtendedReductionCost(
      unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
-      std::optional<FastMathFlags> FMF,
+      Optional<FastMathFlags> FMF,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
  virtual InstructionCost getMulAccReductionCost(
      bool IsUnsigned, Type *ResTy, VectorType *Ty,
@ -1844,16 +1846,17 @@ public:
  virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
                                                   Type *ExpectedType) = 0;
-  virtual Type *getMemcpyLoopLoweringType(
-      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
-      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicElementSize) const = 0;
+  virtual Type *
+  getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+                            unsigned SrcAddrSpace, unsigned DestAddrSpace,
+                            unsigned SrcAlign, unsigned DestAlign,
+                            Optional<uint32_t> AtomicElementSize) const = 0;

  virtual void getMemcpyLoopResidualLoweringType(
      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
      unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicCpySize) const = 0;
+      Optional<uint32_t> AtomicCpySize) const = 0;
  virtual bool areInlineCompatible(const Function *Caller,
                                   const Function *Callee) const = 0;
  virtual bool areTypesABICompatible(const Function *Caller,
@ -2005,18 +2008,18 @@ public:
  PredicationStyle emitGetActiveLaneMask() override {
    return Impl.emitGetActiveLaneMask();
  }
-  std::optional<Instruction *>
-  instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) override {
    return Impl.instCombineIntrinsic(IC, II);
  }
-  std::optional<Value *>
+  Optional<Value *>
  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
                                   APInt DemandedMask, KnownBits &Known,
                                   bool &KnownBitsComputed) override {
    return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
                                                 KnownBitsComputed);
  }
-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -2263,11 +2266,10 @@ public:
        I, AllowPromotionWithoutCommonHeader);
  }
  unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
-  std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
+  Optional<unsigned> getCacheSize(CacheLevel Level) const override {
    return Impl.getCacheSize(Level);
  }
-  std::optional<unsigned>
-  getCacheAssociativity(CacheLevel Level) const override {
+  Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
    return Impl.getCacheAssociativity(Level);
  }

@ -2405,7 +2407,7 @@ public:
  }
  InstructionCost
  getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                             std::optional<FastMathFlags> FMF,
+                             Optional<FastMathFlags> FMF,
                             TTI::TargetCostKind CostKind) override {
    return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
  }
@ -2416,7 +2418,7 @@ public:
  }
  InstructionCost getExtendedReductionCost(
      unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
-      std::optional<FastMathFlags> FMF,
+      Optional<FastMathFlags> FMF,
      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
    return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
                                         CostKind);
@ -2459,7 +2461,7 @@ public:
  Type *getMemcpyLoopLoweringType(
      LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicElementSize) const override {
+      Optional<uint32_t> AtomicElementSize) const override {
    return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
                                          DestAddrSpace, SrcAlign, DestAlign,
                                          AtomicElementSize);
@ -2468,7 +2470,7 @@ public:
      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
      unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicCpySize) const override {
+      Optional<uint32_t> AtomicCpySize) const override {
    Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
                                           SrcAddrSpace, DestAddrSpace,
                                           SrcAlign, DestAlign, AtomicCpySize);
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@ -175,24 +175,24 @@ public:
    return PredicationStyle::None;
  }

-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
-                                                    IntrinsicInst &II) const {
-    return std::nullopt;
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const {
+    return None;
  }

-  std::optional<Value *>
+  Optional<Value *>
  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
                                   APInt DemandedMask, KnownBits &Known,
                                   bool &KnownBitsComputed) const {
-    return std::nullopt;
+    return None;
  }

-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
          SimplifyAndSetOp) const {
-    return std::nullopt;
+    return None;
  }

  void getUnrollingPreferences(Loop *, ScalarEvolution &,
@ -453,24 +453,25 @@ public:
  }

  unsigned getCacheLineSize() const { return 0; }
-  std::optional<unsigned>
+
+  llvm::Optional<unsigned>
  getCacheSize(TargetTransformInfo::CacheLevel Level) const {
    switch (Level) {
    case TargetTransformInfo::CacheLevel::L1D:
      [[fallthrough]];
    case TargetTransformInfo::CacheLevel::L2D:
-      return std::nullopt;
+      return llvm::None;
    }
    llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  }

-  std::optional<unsigned>
+  llvm::Optional<unsigned>
  getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
    switch (Level) {
    case TargetTransformInfo::CacheLevel::L1D:
      [[fallthrough]];
    case TargetTransformInfo::CacheLevel::L2D:
-      return std::nullopt;
+      return llvm::None;
    }

    llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
@ -693,7 +694,7 @@ public:
  }

  InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
-                                             std::optional<FastMathFlags> FMF,
+                                             Optional<FastMathFlags> FMF,
                                             TTI::TargetCostKind) const {
    return 1;
  }
@ -705,7 +706,7 @@ public:

  InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
                                           Type *ResTy, VectorType *Ty,
-                                           std::optional<FastMathFlags> FMF,
+                                           Optional<FastMathFlags> FMF,
                                           TTI::TargetCostKind CostKind) const {
    return 1;
  }
@ -738,11 +739,10 @@ public:
    return nullptr;
  }

-  Type *
-  getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
-                            unsigned SrcAddrSpace, unsigned DestAddrSpace,
-                            unsigned SrcAlign, unsigned DestAlign,
-                            std::optional<uint32_t> AtomicElementSize) const {
+  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+                                  unsigned SrcAddrSpace, unsigned DestAddrSpace,
+                                  unsigned SrcAlign, unsigned DestAlign,
+                                  Optional<uint32_t> AtomicElementSize) const {
    return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
                             : Type::getInt8Ty(Context);
  }
@ -751,7 +751,7 @@ public:
      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
      unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicCpySize) const {
+      Optional<uint32_t> AtomicCpySize) const {
    unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
    Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
    for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@ -621,20 +621,21 @@ public:
    return BaseT::emitGetActiveLaneMask();
  }

-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
                                               IntrinsicInst &II) {
    return BaseT::instCombineIntrinsic(IC, II);
  }

-  std::optional<Value *>
-  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
-                                   APInt DemandedMask, KnownBits &Known,
-                                   bool &KnownBitsComputed) {
+  Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC,
+                                                     IntrinsicInst &II,
+                                                     APInt DemandedMask,
+                                                     KnownBits &Known,
+                                                     bool &KnownBitsComputed) {
    return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
                                                   KnownBitsComputed);
  }

-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -644,15 +645,15 @@ public:
        SimplifyAndSetOp);
  }

-  virtual std::optional<unsigned>
+  virtual Optional<unsigned>
  getCacheSize(TargetTransformInfo::CacheLevel Level) const {
-    return std::optional<unsigned>(
-        getST()->getCacheSize(static_cast<unsigned>(Level)));
+    return Optional<unsigned>(
+      getST()->getCacheSize(static_cast<unsigned>(Level)));
  }

-  virtual std::optional<unsigned>
+  virtual Optional<unsigned>
  getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
-    std::optional<unsigned> TargetResult =
+    Optional<unsigned> TargetResult =
        getST()->getCacheAssociativity(static_cast<unsigned>(Level));

    if (TargetResult)
@ -2282,7 +2283,7 @@ public:
  }

  InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                             std::optional<FastMathFlags> FMF,
+                                             Optional<FastMathFlags> FMF,
                                             TTI::TargetCostKind CostKind) {
    if (TTI::requiresOrderedReduction(FMF))
      return getOrderedReductionCost(Opcode, Ty, CostKind);
@ -2356,7 +2357,7 @@ public:

  InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
                                           Type *ResTy, VectorType *Ty,
-                                           std::optional<FastMathFlags> FMF,
+                                           Optional<FastMathFlags> FMF,
                                           TTI::TargetCostKind CostKind) {
    // Without any native support, this is equivalent to the cost of
    // vecreduce.opcode(ext(Ty A)).
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@ -23,7 +23,6 @@
 #include "llvm/MC/SubtargetFeature.h"
 #include <cassert>
 #include <cstdint>
-#include <optional>
 #include <string>

 namespace llvm {
@ -237,13 +236,13 @@ public:
  /// Level is zero-based, so a value of zero means the first level of
  /// cache.
  ///
-  virtual std::optional<unsigned> getCacheSize(unsigned Level) const;
+  virtual Optional<unsigned> getCacheSize(unsigned Level) const;

  /// Return the cache associatvity for the given level of cache.
  /// Level is zero-based, so a value of zero means the first level of
  /// cache.
  ///
-  virtual std::optional<unsigned> getCacheAssociativity(unsigned Level) const;
+  virtual Optional<unsigned> getCacheAssociativity(unsigned Level) const;

  /// Return the target cache line size in bytes at a given level.
  ///
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@ -378,12 +378,12 @@ public:
  LoopInfo *getLoopInfo() const { return LI; }

  // Call target specific combiners
-  std::optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II);
-  std::optional<Value *>
+  Optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II);
+  Optional<Value *>
  targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask,
                                         KnownBits &Known,
                                         bool &KnownBitsComputed);
-  std::optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic(
+  Optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic(
      IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
--- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@ -14,7 +14,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H
 #define LLVM_TRANSFORMS_UTILS_LOWERMEMINTRINSICS_H

-#include <optional>
+#include "llvm/ADT/Optional.h"

 namespace llvm {

@ -31,19 +31,21 @@ struct Align;

 /// Emit a loop implementing the semantics of llvm.memcpy where the size is not
 /// a compile-time constant. Loop will be insterted at \p InsertBefore.
-void createMemCpyLoopUnknownSize(
-    Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
-    Align SrcAlign, Align DestAlign, bool SrcIsVolatile, bool DstIsVolatile,
-    bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<unsigned> AtomicSize = std::nullopt);
+void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr,
+                                 Value *DstAddr, Value *CopyLen, Align SrcAlign,
+                                 Align DestAlign, bool SrcIsVolatile,
+                                 bool DstIsVolatile, bool CanOverlap,
+                                 const TargetTransformInfo &TTI,
+                                 Optional<unsigned> AtomicSize = None);

 /// Emit a loop implementing the semantics of an llvm.memcpy whose size is a
 /// compile time constant. Loop is inserted at \p InsertBefore.
-void createMemCpyLoopKnownSize(
-    Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
-    ConstantInt *CopyLen, Align SrcAlign, Align DestAlign, bool SrcIsVolatile,
-    bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<uint32_t> AtomicCpySize = std::nullopt);
+void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
+                               Value *DstAddr, ConstantInt *CopyLen,
+                               Align SrcAlign, Align DestAlign,
+                               bool SrcIsVolatile, bool DstIsVolatile,
+                               bool CanOverlap, const TargetTransformInfo &TTI,
+                               Optional<uint32_t> AtomicCpySize = None);

 /// Expand \p MemCpy as a loop. \p MemCpy is not deleted.
 void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI,
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@ -20,7 +20,6 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
-#include <optional>
 #include <utility>

 using namespace llvm;
@ -309,20 +308,20 @@ PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const {
  return TTIImpl->emitGetActiveLaneMask();
 }

-std::optional<Instruction *>
+Optional<Instruction *>
 TargetTransformInfo::instCombineIntrinsic(InstCombiner &IC,
                                          IntrinsicInst &II) const {
  return TTIImpl->instCombineIntrinsic(IC, II);
 }

-std::optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic(
+Optional<Value *> TargetTransformInfo::simplifyDemandedUseBitsIntrinsic(
    InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
    bool &KnownBitsComputed) const {
  return TTIImpl->simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
                                                   KnownBitsComputed);
 }

-std::optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
+Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
    InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
    APInt &UndefElts2, APInt &UndefElts3,
    std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -690,12 +689,12 @@ unsigned TargetTransformInfo::getCacheLineSize() const {
                                               : TTIImpl->getCacheLineSize();
 }

-std::optional<unsigned>
+llvm::Optional<unsigned>
 TargetTransformInfo::getCacheSize(CacheLevel Level) const {
  return TTIImpl->getCacheSize(Level);
 }

-std::optional<unsigned>
+llvm::Optional<unsigned>
 TargetTransformInfo::getCacheAssociativity(CacheLevel Level) const {
  return TTIImpl->getCacheAssociativity(Level);
 }
@ -1001,7 +1000,7 @@ InstructionCost TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
 }

 InstructionCost TargetTransformInfo::getArithmeticReductionCost(
-    unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
+    unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
    TTI::TargetCostKind CostKind) const {
  InstructionCost Cost =
      TTIImpl->getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
@ -1020,7 +1019,7 @@ InstructionCost TargetTransformInfo::getMinMaxReductionCost(

 InstructionCost TargetTransformInfo::getExtendedReductionCost(
    unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
-    std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) const {
+    Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) const {
  return TTIImpl->getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
                                           CostKind);
 }
@ -1053,7 +1052,7 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
 Type *TargetTransformInfo::getMemcpyLoopLoweringType(
    LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
    unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
-    std::optional<uint32_t> AtomicElementSize) const {
+    Optional<uint32_t> AtomicElementSize) const {
  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
                                            DestAddrSpace, SrcAlign, DestAlign,
                                            AtomicElementSize);
@ -1063,7 +1062,7 @@ void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
    SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
    unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
    unsigned SrcAlign, unsigned DestAlign,
-    std::optional<uint32_t> AtomicCpySize) const {
+    Optional<uint32_t> AtomicCpySize) const {
  TTIImpl->getMemcpyLoopResidualLoweringType(
      OpsOut, Context, RemainingBytes, SrcAddrSpace, DestAddrSpace, SrcAlign,
      DestAlign, AtomicCpySize);
--- a/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/llvm/lib/MC/MCSubtargetInfo.cpp
@ -17,7 +17,6 @@
 #include <algorithm>
 #include <cassert>
 #include <cstring>
-#include <optional>

 using namespace llvm;

@ -336,11 +335,11 @@ void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const {
                                  ForwardingPaths);
 }

-std::optional<unsigned> MCSubtargetInfo::getCacheSize(unsigned Level) const {
-  return std::nullopt;
+Optional<unsigned> MCSubtargetInfo::getCacheSize(unsigned Level) const {
+  return None;
 }

-std::optional<unsigned>
+Optional<unsigned>
 MCSubtargetInfo::getCacheAssociativity(unsigned Level) const {
  return None;
 }
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@ -24,7 +24,6 @@
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 #include <algorithm>
-#include <optional>
 using namespace llvm;
 using namespace llvm::PatternMatch;

@ -522,8 +521,8 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

 /// The function will remove redundant reinterprets casting in the presence
 /// of the control flow
-static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
-                                                   IntrinsicInst &II) {
+static Optional<Instruction *> processPhiNode(InstCombiner &IC,
+                                              IntrinsicInst &II) {
  SmallVector<Instruction *, 32> Worklist;
  auto RequiredType = II.getType();

@ -532,7 +531,7 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,

  // Don't create a new Phi unless we can remove the old one.
  if (!PN->hasOneUse())
-    return std::nullopt;
+    return None;

  for (Value *IncValPhi : PN->incoming_values()) {
    auto *Reinterpret = dyn_cast<IntrinsicInst>(IncValPhi);
@ -540,7 +539,7 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
        Reinterpret->getIntrinsicID() !=
            Intrinsic::aarch64_sve_convert_to_svbool ||
        RequiredType != Reinterpret->getArgOperand(0)->getType())
-      return std::nullopt;
+      return None;
  }

  // Create the new Phi
@ -569,11 +568,11 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
 // and` into a `<vscale x 4 x i1> and`. This is profitable because
 // to_svbool must zero the new lanes during widening, whereas
 // from_svbool is free.
-static std::optional<Instruction *>
-tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) {
+static Optional<Instruction *> tryCombineFromSVBoolBinOp(InstCombiner &IC,
+                                                         IntrinsicInst &II) {
  auto BinOp = dyn_cast<IntrinsicInst>(II.getOperand(0));
  if (!BinOp)
-    return std::nullopt;
+    return None;

  auto IntrinsicID = BinOp->getIntrinsicID();
  switch (IntrinsicID) {
@ -586,7 +585,7 @@ tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) {
  case Intrinsic::aarch64_sve_orr_z:
    break;
  default:
-    return std::nullopt;
+    return None;
  }

  auto BinOpPred = BinOp->getOperand(0);
@ -596,12 +595,12 @@ tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) {
  auto PredIntr = dyn_cast<IntrinsicInst>(BinOpPred);
  if (!PredIntr ||
      PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
-    return std::nullopt;
+    return None;

  auto PredOp = PredIntr->getOperand(0);
  auto PredOpTy = cast<VectorType>(PredOp->getType());
  if (PredOpTy != II.getType())
-    return std::nullopt;
+    return None;

  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
@ -621,8 +620,8 @@ tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) {
  return IC.replaceInstUsesWith(II, NarrowedBinOp);
 }

-static std::optional<Instruction *>
-instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) {
+static Optional<Instruction *> instCombineConvertFromSVBool(InstCombiner &IC,
+                                                            IntrinsicInst &II) {
  // If the reinterpret instruction operand is a PHI Node
  if (isa<PHINode>(II.getArgOperand(0)))
    return processPhiNode(IC, II);
@ -664,32 +663,32 @@ instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) {
  // If no viable replacement in the conversion chain was found, there is
  // nothing to do.
  if (!EarliestReplacement)
-    return std::nullopt;
+    return None;

  return IC.replaceInstUsesWith(II, EarliestReplacement);
 }

-static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
-                                                      IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVESel(InstCombiner &IC,
+                                                 IntrinsicInst &II) {
  IRBuilder<> Builder(&II);
  auto Select = Builder.CreateSelect(II.getOperand(0), II.getOperand(1),
                                     II.getOperand(2));
  return IC.replaceInstUsesWith(II, Select);
 }

-static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
-                                                      IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
+                                                 IntrinsicInst &II) {
  IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
  if (!Pg)
-    return std::nullopt;
+    return None;

  if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
-    return std::nullopt;
+    return None;

  const auto PTruePattern =
      cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
  if (PTruePattern != AArch64SVEPredPattern::vl1)
-    return std::nullopt;
+    return None;

  // The intrinsic is inserting into lane zero so use an insert instead.
  auto *IdxTy = Type::getInt64Ty(II.getContext());
@ -701,8 +700,8 @@ static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, Insert);
 }

-static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
-                                                       IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
+                                                  IntrinsicInst &II) {
  // Replace DupX with a regular IR splat.
  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
@ -713,8 +712,8 @@ static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, Splat);
 }

-static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
-                                                        IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
  LLVMContext &Ctx = II.getContext();
  IRBuilder<> Builder(Ctx);
  Builder.SetInsertPoint(&II);
@ -722,49 +721,49 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
  // Check that the predicate is all active
  auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
  if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
-    return std::nullopt;
+    return None;

  const auto PTruePattern =
      cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
  if (PTruePattern != AArch64SVEPredPattern::all)
-    return std::nullopt;
+    return None;

  // Check that we have a compare of zero..
  auto *SplatValue =
      dyn_cast_or_null<ConstantInt>(getSplatValue(II.getArgOperand(2)));
  if (!SplatValue || !SplatValue->isZero())
-    return std::nullopt;
+    return None;

  // ..against a dupq
  auto *DupQLane = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
  if (!DupQLane ||
      DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
-    return std::nullopt;
+    return None;

  // Where the dupq is a lane 0 replicate of a vector insert
  if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
-    return std::nullopt;
+    return None;

  auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
  if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
-    return std::nullopt;
+    return None;

  // Where the vector insert is a fixed constant vector insert into undef at
  // index zero
  if (!isa<UndefValue>(VecIns->getArgOperand(0)))
-    return std::nullopt;
+    return None;

  if (!cast<ConstantInt>(VecIns->getArgOperand(2))->isZero())
-    return std::nullopt;
+    return None;

  auto *ConstVec = dyn_cast<Constant>(VecIns->getArgOperand(1));
  if (!ConstVec)
-    return std::nullopt;
+    return None;

  auto *VecTy = dyn_cast<FixedVectorType>(ConstVec->getType());
  auto *OutTy = dyn_cast<ScalableVectorType>(II.getType());
  if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
-    return std::nullopt;
+    return None;

  unsigned NumElts = VecTy->getNumElements();
  unsigned PredicateBits = 0;
@ -773,7 +772,7 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
  for (unsigned I = 0; I < NumElts; ++I) {
    auto *Arg = dyn_cast<ConstantInt>(ConstVec->getAggregateElement(I));
    if (!Arg)
-      return std::nullopt;
+      return None;
    if (!Arg->isZero())
      PredicateBits |= 1 << (I * (16 / NumElts));
  }
@ -798,7 +797,7 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
  // Ensure all relevant bits are set
  for (unsigned I = 0; I < 16; I += PredSize)
    if ((PredicateBits & (1 << I)) == 0)
-      return std::nullopt;
+      return None;

  auto *PTruePat =
      ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
@ -814,8 +813,8 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, ConvertFromSVBool);
 }

-static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
-                                                       IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
+                                                  IntrinsicInst &II) {
  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
  Value *Pg = II.getArgOperand(0);
@ -856,10 +855,10 @@ static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,

  auto *IntrPG = dyn_cast<IntrinsicInst>(Pg);
  if (!IntrPG)
-    return std::nullopt;
+    return None;

  if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
-    return std::nullopt;
+    return None;

  const auto PTruePattern =
      cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
@ -867,7 +866,7 @@ static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
  // Can the intrinsic's predicate be converted to a known constant index?
  unsigned MinNumElts = getNumElementsFromSVEPredPattern(PTruePattern);
  if (!MinNumElts)
-    return std::nullopt;
+    return None;

  unsigned Idx = MinNumElts - 1;
  // Increment the index if extracting the element after the last active
@ -880,7 +879,7 @@ static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
  // maintain what the user asked for until an alternative is proven faster.
  auto *PgVTy = cast<ScalableVectorType>(Pg->getType());
  if (Idx >= PgVTy->getMinNumElements())
-    return std::nullopt;
+    return None;

  // The intrinsic is extracting a fixed lane so use an extract instead.
  auto *IdxTy = Type::getInt64Ty(II.getContext());
@ -890,8 +889,8 @@ static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, Extract);
 }

-static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
-                                                           IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
+                                                      IntrinsicInst &II) {
  // The SIMD&FP variant of CLAST[AB] is significantly faster than the scalar
  // integer variant across a variety of micro-architectures. Replace scalar
  // integer CLAST[AB] intrinsic with optimal SIMD&FP variant. A simple
@ -907,12 +906,12 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
  Type *Ty = II.getType();

  if (!Ty->isIntegerTy())
-    return std::nullopt;
+    return None;

  Type *FPTy;
  switch (cast<IntegerType>(Ty)->getBitWidth()) {
  default:
-    return std::nullopt;
+    return None;
  case 16:
    FPTy = Builder.getHalfTy();
    break;
@ -934,8 +933,8 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, FPIItoInt);
 }

-static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
-                                                     IntrinsicInst &II) {
+static Optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
+                                                IntrinsicInst &II) {
  LLVMContext &Ctx = II.getContext();
  IRBuilder<> Builder(Ctx);
  Builder.SetInsertPoint(&II);
@ -951,7 +950,7 @@ static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, RDFFR);
 }

-static std::optional<Instruction *>
+static Optional<Instruction *>
 instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
  const auto Pattern = cast<ConstantInt>(II.getArgOperand(0))->getZExtValue();

@ -969,13 +968,13 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
  unsigned MinNumElts = getNumElementsFromSVEPredPattern(Pattern);

  return MinNumElts && NumElts >= MinNumElts
-             ? std::optional<Instruction *>(IC.replaceInstUsesWith(
+             ? Optional<Instruction *>(IC.replaceInstUsesWith(
                   II, ConstantInt::get(II.getType(), MinNumElts)))
-             : std::nullopt;
+             : None;
 }

-static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
-                                                        IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
  Value *PgVal = II.getArgOperand(0);
  Value *OpVal = II.getArgOperand(1);

@ -1001,7 +1000,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
  IntrinsicInst *Op = dyn_cast<IntrinsicInst>(OpVal);

  if (!Pg || !Op)
-    return std::nullopt;
+    return None;

  Intrinsic::ID OpIID = Op->getIntrinsicID();

@ -1042,11 +1041,11 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
    return IC.replaceInstUsesWith(II, PTest);
  }

-  return std::nullopt;
+  return None;
 }

-static std::optional<Instruction *>
-instCombineSVEVectorFMLA(InstCombiner &IC, IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
+                                                        IntrinsicInst &II) {
  // fold (fadd p a (fmul p b c)) -> (fma p a b c)
  Value *P = II.getOperand(0);
  Value *A = II.getOperand(1);
@ -1054,18 +1053,18 @@ instCombineSVEVectorFMLA(InstCombiner &IC, IntrinsicInst &II) {
  Value *B, *C;
  if (!match(FMul, m_Intrinsic<Intrinsic::aarch64_sve_fmul>(
                       m_Specific(P), m_Value(B), m_Value(C))))
-    return std::nullopt;
+    return None;

  if (!FMul->hasOneUse())
-    return std::nullopt;
+    return None;

  llvm::FastMathFlags FAddFlags = II.getFastMathFlags();
  // Stop the combine when the flags on the inputs differ in case dropping flags
  // would lead to us missing out on more beneficial optimizations.
  if (FAddFlags != cast<CallInst>(FMul)->getFastMathFlags())
-    return std::nullopt;
+    return None;
  if (!FAddFlags.allowContract())
-    return std::nullopt;
+    return None;

  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
@ -1091,7 +1090,7 @@ static bool isAllActivePredicate(Value *Pred) {
                         m_ConstantInt<AArch64SVEPredPattern::all>()));
 }

-static std::optional<Instruction *>
+static Optional<Instruction *>
 instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
@ -1114,7 +1113,7 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
  return IC.replaceInstUsesWith(II, MaskedLoad);
 }

-static std::optional<Instruction *>
+static Optional<Instruction *>
 instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
@ -1150,14 +1149,14 @@ static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
  }
 }

-static std::optional<Instruction *>
-instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEVectorBinOp(InstCombiner &IC,
+                                                         IntrinsicInst &II) {
  auto *OpPredicate = II.getOperand(0);
  auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
  if (BinOpCode == Instruction::BinaryOpsEnd ||
      !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
                              m_ConstantInt<AArch64SVEPredPattern::all>())))
-    return std::nullopt;
+    return None;
  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
  Builder.setFastMathFlags(II.getFastMathFlags());
@ -1166,15 +1165,15 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
  return IC.replaceInstUsesWith(II, BinOp);
 }

-static std::optional<Instruction *>
-instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC,
+                                                        IntrinsicInst &II) {
  if (auto FMLA = instCombineSVEVectorFMLA(IC, II))
    return FMLA;
  return instCombineSVEVectorBinOp(IC, II);
 }

-static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
-                                                            IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
+                                                       IntrinsicInst &II) {
  auto *OpPredicate = II.getOperand(0);
  auto *OpMultiplicand = II.getOperand(1);
  auto *OpMultiplier = II.getOperand(2);
@ -1220,8 +1219,8 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
  return instCombineSVEVectorBinOp(IC, II);
 }

-static std::optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
-                                                         IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
+                                                    IntrinsicInst &II) {
  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
  Value *UnpackArg = II.getArgOperand(0);
@ -1240,10 +1239,10 @@ static std::optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
    return IC.replaceInstUsesWith(II, NewVal);
  }

-  return std::nullopt;
+  return None;
 }
-static std::optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
-                                                      IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
+                                                 IntrinsicInst &II) {
  auto *OpVal = II.getOperand(0);
  auto *OpIndices = II.getOperand(1);
  VectorType *VTy = cast<VectorType>(II.getType());
@ -1253,7 +1252,7 @@ static std::optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
  auto *SplatValue = dyn_cast_or_null<ConstantInt>(getSplatValue(OpIndices));
  if (!SplatValue ||
      SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
-    return std::nullopt;
+    return None;

  // Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to
  // splat_vector(extractelement(OpVal, SplatValue)) for further optimization.
@ -1267,8 +1266,8 @@ static std::optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, VectorSplat);
 }

-static std::optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
-                                                      IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
+                                                 IntrinsicInst &II) {
  // zip1(uzp1(A, B), uzp2(A, B)) --> A
  // zip2(uzp1(A, B), uzp2(A, B)) --> B
  Value *A, *B;
@ -1279,11 +1278,11 @@ static std::optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
    return IC.replaceInstUsesWith(
        II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));

-  return std::nullopt;
+  return None;
 }

-static std::optional<Instruction *>
-instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
+static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
+                                                         IntrinsicInst &II) {
  Value *Mask = II.getOperand(0);
  Value *BasePtr = II.getOperand(1);
  Value *Index = II.getOperand(2);
@ -1303,8 +1302,8 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
        BasePtr->getPointerAlignment(II.getModule()->getDataLayout());

    Type *VecPtrTy = PointerType::getUnqual(Ty);
-    Value *Ptr = Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
-                                   BasePtr, IndexBase);
+    Value *Ptr = Builder.CreateGEP(
+        cast<VectorType>(Ty)->getElementType(), BasePtr, IndexBase);
    Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
    CallInst *MaskedLoad =
        Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
@ -1312,11 +1311,11 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
    return IC.replaceInstUsesWith(II, MaskedLoad);
  }

-  return std::nullopt;
+  return None;
 }

-static std::optional<Instruction *>
-instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
+static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
+                                                          IntrinsicInst &II) {
  Value *Val = II.getOperand(0);
  Value *Mask = II.getOperand(1);
  Value *BasePtr = II.getOperand(2);
@ -1335,8 +1334,8 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
    Align Alignment =
        BasePtr->getPointerAlignment(II.getModule()->getDataLayout());

-    Value *Ptr = Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
-                                   BasePtr, IndexBase);
+    Value *Ptr = Builder.CreateGEP(
+        cast<VectorType>(Ty)->getElementType(), BasePtr, IndexBase);
    Type *VecPtrTy = PointerType::getUnqual(Ty);
    Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);

@ -1345,11 +1344,11 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
    return IC.eraseInstFromFunction(II);
  }

-  return std::nullopt;
+  return None;
 }

-static std::optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
-                                                       IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
+                                                  IntrinsicInst &II) {
  IRBuilder<> Builder(II.getContext());
  Builder.SetInsertPoint(&II);
  Type *Int32Ty = Builder.getInt32Ty();
@ -1360,7 +1359,7 @@ static std::optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
  Value *SplatValue = getSplatValue(DivVec);
  ConstantInt *SplatConstantInt = dyn_cast_or_null<ConstantInt>(SplatValue);
  if (!SplatConstantInt)
-    return std::nullopt;
+    return None;
  APInt Divisor = SplatConstantInt->getValue();

  if (Divisor.isPowerOf2()) {
@ -1379,21 +1378,21 @@ static std::optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
    return IC.replaceInstUsesWith(II, NEG);
  }

-  return std::nullopt;
+  return None;
 }

-static std::optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
-                                                        IntrinsicInst &II) {
+static Optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
  Value *A = II.getArgOperand(0);
  Value *B = II.getArgOperand(1);
  if (A == B)
    return IC.replaceInstUsesWith(II, A);

-  return std::nullopt;
+  return None;
 }

-static std::optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
-                                                        IntrinsicInst &II) {
+static Optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
+                                                   IntrinsicInst &II) {
  IRBuilder<> Builder(&II);
  Value *Pred = II.getOperand(0);
  Value *Vec = II.getOperand(1);
@ -1406,20 +1405,21 @@ static std::optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
      !match(Vec, m_Intrinsic<Intrinsic::aarch64_sve_abs>(
                      m_Value(MergedValue), m_Value(AbsPred), m_Value())))

-    return std::nullopt;
+    return None;

  // Transform is valid if any of the following are true:
  // * The ABS merge value is an undef or non-negative
  // * The ABS predicate is all active
  // * The ABS predicate and the SRSHL predicates are the same
-  if (!isa<UndefValue>(MergedValue) && !match(MergedValue, m_NonNegative()) &&
+  if (!isa<UndefValue>(MergedValue) &&
+      !match(MergedValue, m_NonNegative()) &&
      AbsPred != Pred && !isAllActivePredicate(AbsPred))
-    return std::nullopt;
+    return None;

  // Only valid when the shift amount is non-negative, otherwise the rounding
  // behaviour of SRSHL cannot be ignored.
  if (!match(Shift, m_NonNegative()))
-    return std::nullopt;
+    return None;

  auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()},
                                     {Pred, Vec, Shift});
@ -1427,7 +1427,7 @@ static std::optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, LSL);
 }

-std::optional<Instruction *>
+Optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                     IntrinsicInst &II) const {
  Intrinsic::ID IID = II.getIntrinsicID();
@ -1499,10 +1499,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
    return instCombineSVESrshl(IC, II);
  }

-  return std::nullopt;
+  return None;
 }

-std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
+Optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
    InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
    APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
    std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -1525,7 +1525,7 @@ std::optional<Value *> AArch64TTIImpl::simplifyDemandedVectorEltsIntrinsic(
    break;
  }

-  return std::nullopt;
+  return None;
 }

 TypeSize
@ -2814,7 +2814,7 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE(

 InstructionCost
 AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
-                                           std::optional<FastMathFlags> FMF,
+                                           Optional<FastMathFlags> FMF,
                                           TTI::TargetCostKind CostKind) {
  if (TTI::requiresOrderedReduction(FMF)) {
    if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@ -112,10 +112,10 @@ public:
  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                        TTI::TargetCostKind CostKind);

-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
-                                                    IntrinsicInst &II) const;
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const;

-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -370,7 +370,7 @@ public:
  }

  InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                             std::optional<FastMathFlags> FMF,
+                                             Optional<FastMathFlags> FMF,
                                             TTI::TargetCostKind CostKind);

  InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@ -20,7 +20,6 @@
 #include "llvm/ADT/FloatingPointMode.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
-#include <optional>

 using namespace llvm;

@ -115,14 +114,14 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
 /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
 /// modified arguments (based on OldIntr) and replaces InstToReplace with
 /// this newly created intrinsic call.
-static std::optional<Instruction *> modifyIntrinsicCall(
+static Optional<Instruction *> modifyIntrinsicCall(
    IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
    InstCombiner &IC,
    std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
        Func) {
  SmallVector<Type *, 4> ArgTys;
  if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
-    return std::nullopt;
+    return None;

  SmallVector<Value *, 8> Args(OldIntr.args());

@ -150,7 +149,7 @@ static std::optional<Instruction *> modifyIntrinsicCall(
  return RetValue;
 }

-static std::optional<Instruction *>
+static Optional<Instruction *>
 simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
                             const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
                             IntrinsicInst &II, InstCombiner &IC) {
@ -253,7 +252,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,

  // Try to use A16 or G16
  if (!ST->hasA16() && !ST->hasG16())
-    return std::nullopt;
+    return None;

  // Address is interpreted as float if the instruction has a sampler or as
  // unsigned int if there is no sampler.
@ -296,7 +295,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,

  if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
                                               ImageDimIntr->CoordStart))
-    return std::nullopt;
+    return None;

  Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
                               : Type::getInt16Ty(II.getContext());
@ -349,7 +348,7 @@ bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
  return false;
 }

-std::optional<Instruction *>
+Optional<Instruction *>
 GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
  Intrinsic::ID IID = II.getIntrinsicID();
  switch (IID) {
@ -1060,7 +1059,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
    }
  }
  }
-  return std::nullopt;
+  return None;
 }

 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
@ -1205,7 +1204,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
  return Shuffle;
 }

-std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
+Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
    InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
    APInt &UndefElts2, APInt &UndefElts3,
    std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -1229,5 +1228,5 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
    break;
  }
  }
-  return std::nullopt;
+  return None;
 }
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@ -23,7 +23,6 @@
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/KnownBits.h"
-#include <optional>

 using namespace llvm;

@ -402,7 +401,7 @@ bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
 Type *GCNTTIImpl::getMemcpyLoopLoweringType(
    LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
    unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
-    std::optional<uint32_t> AtomicElementSize) const {
+    Optional<uint32_t> AtomicElementSize) const {

  if (AtomicElementSize)
    return Type::getIntNTy(Context, *AtomicElementSize * 8);
@ -434,7 +433,7 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
    SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
    unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
    unsigned SrcAlign, unsigned DestAlign,
-    std::optional<uint32_t> AtomicCpySize) const {
+    Optional<uint32_t> AtomicCpySize) const {
  assert(RemainingBytes < 16);

  if (AtomicCpySize)
@ -757,7 +756,7 @@ InstructionCost GCNTTIImpl::getCFInstrCost(unsigned Opcode,

 InstructionCost
 GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                       std::optional<FastMathFlags> FMF,
+                                       Optional<FastMathFlags> FMF,
                                       TTI::TargetCostKind CostKind) {
  if (TTI::requiresOrderedReduction(FMF))
    return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@ -19,7 +19,6 @@

 #include "AMDGPU.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
-#include <optional>

 namespace llvm {

@ -133,16 +132,16 @@ public:
                                   unsigned AddrSpace) const;
  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
                                    unsigned AddrSpace) const;
-  Type *getMemcpyLoopLoweringType(
-      LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
-      unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicElementSize) const;
+  Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+                                  unsigned SrcAddrSpace, unsigned DestAddrSpace,
+                                  unsigned SrcAlign, unsigned DestAlign,
+                                  Optional<uint32_t> AtomicElementSize) const;

  void getMemcpyLoopResidualLoweringType(
      SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
      unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
      unsigned SrcAlign, unsigned DestAlign,
-      std::optional<uint32_t> AtomicCpySize) const;
+      Optional<uint32_t> AtomicCpySize) const;
  unsigned getMaxInterleaveFactor(unsigned VF);

  bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
@ -189,9 +188,9 @@ public:

  bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
                                 InstCombiner &IC) const;
-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
-                                                    IntrinsicInst &II) const;
-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const;
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -214,7 +213,7 @@ public:
  int getInlinerVectorBonusPercent() { return 0; }

  InstructionCost getArithmeticReductionCost(
-      unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
+      unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
      TTI::TargetCostKind CostKind);

  InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@ -37,7 +37,6 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
-#include <optional>
 #include <utility>

 using namespace llvm;
@ -118,7 +117,7 @@ ARMTTIImpl::getPreferredAddressingMode(const Loop *L,
  return TTI::AMK_None;
 }

-std::optional<Instruction *>
+Optional<Instruction *>
 ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
  using namespace PatternMatch;
  Intrinsic::ID IID = II.getIntrinsicID();
@ -244,13 +243,13 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
        return IC.eraseInstFromFunction(*User);
      }
    }
-    return std::nullopt;
+    return None;
  }
  }
-  return std::nullopt;
+  return None;
 }

-std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
+Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
    InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
    APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
    std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -272,7 +271,7 @@ std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
    // The other lanes will be defined from the inserted elements.
    UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
                                                 : APInt::getHighBitsSet(2, 1));
-    return std::nullopt;
+    return None;
  };

  switch (II.getIntrinsicID()) {
@ -289,7 +288,7 @@ std::optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
    break;
  }

-  return std::nullopt;
+  return None;
 }

 InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
@ -1654,7 +1653,7 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(

 InstructionCost
 ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
-                                       std::optional<FastMathFlags> FMF,
+                                       Optional<FastMathFlags> FMF,
                                       TTI::TargetCostKind CostKind) {
  if (TTI::requiresOrderedReduction(FMF))
    return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
@ -1679,7 +1678,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,

 InstructionCost ARMTTIImpl::getExtendedReductionCost(
    unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
-    std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
+    Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
  EVT ValVT = TLI->getValueType(DL, ValTy);
  EVT ResVT = TLI->getValueType(DL, ResTy);

--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@ -26,7 +26,6 @@
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include <optional>

 namespace llvm {

@ -119,9 +118,9 @@ public:
    return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
  }

-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
-                                                    IntrinsicInst &II) const;
-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const;
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -275,11 +274,11 @@ public:
                                         const Instruction *I = nullptr);

  InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
-                                             std::optional<FastMathFlags> FMF,
+                                             Optional<FastMathFlags> FMF,
                                             TTI::TargetCostKind CostKind);
  InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
                                           Type *ResTy, VectorType *ValTy,
-                                           std::optional<FastMathFlags> FMF,
+                                           Optional<FastMathFlags> FMF,
                                           TTI::TargetCostKind CostKind);
  InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
                                         VectorType *ValTy,
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@ -414,12 +414,12 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
 }

-std::optional<Instruction *>
+Optional<Instruction *>
 NVPTXTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
  if (Instruction *I = simplifyNvvmIntrinsic(&II, IC)) {
    return I;
  }
-  return std::nullopt;
+  return None;
 }

 InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@ -21,7 +21,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/TargetLowering.h"
-#include <optional>

 namespace llvm {

@ -54,8 +53,8 @@ public:
           AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
  }

-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
-                                                    IntrinsicInst &II) const;
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const;

  // Loads and stores can be vectorized if the alignment is at least as big as
  // the load/store we want to vectorize.
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@ -21,7 +21,6 @@
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include <optional>

 using namespace llvm;

@ -61,7 +60,7 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
  return TTI::PSK_Software;
 }

-std::optional<Instruction *>
+Optional<Instruction *>
 PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
  Intrinsic::ID IID = II.getIntrinsicID();
  switch (IID) {
@ -161,7 +160,7 @@ PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
    }
    break;
  }
-  return std::nullopt;
+  return None;
 }

 InstructionCost PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@ -20,7 +20,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
 #include "llvm/CodeGen/TargetLowering.h"
-#include <optional>

 namespace llvm {

@ -42,8 +41,8 @@ public:
      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
        TLI(ST->getTargetLowering()) {}

-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
-                                                    IntrinsicInst & II) const;
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const;

  /// \name Scalar TTI Implementations
  /// @{
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@ -784,7 +784,7 @@ RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,

 InstructionCost
 RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                         std::optional<FastMathFlags> FMF,
+                                         Optional<FastMathFlags> FMF,
                                         TTI::TargetCostKind CostKind) {
  if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
    return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
@ -815,7 +815,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

 InstructionCost RISCVTTIImpl::getExtendedReductionCost(
    unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
-    std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
+    Optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
  if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
    return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
                                           FMF, CostKind);
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@ -137,12 +137,12 @@ public:
                                         TTI::TargetCostKind CostKind);

  InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                             std::optional<FastMathFlags> FMF,
+                                             Optional<FastMathFlags> FMF,
                                             TTI::TargetCostKind CostKind);

  InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
                                           Type *ResTy, VectorType *ValTy,
-                                           std::optional<FastMathFlags> FMF,
+                                           Optional<FastMathFlags> FMF,
                                           TTI::TargetCostKind CostKind);

  InstructionCost
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@ -18,7 +18,6 @@
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
-#include <optional>

 using namespace llvm;

@ -925,7 +924,7 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
  return Builder.CreateShuffleVector(V1, makeArrayRef(Indexes, Size));
 }

-std::optional<Instruction *>
+Optional<Instruction *>
 X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
  auto SimplifyDemandedVectorEltsLow = [&IC](Value *Op, unsigned Width,
                                             unsigned DemandedWidth) {
@ -1731,10 +1730,10 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
  default:
    break;
  }
-  return std::nullopt;
+  return None;
 }

-std::optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
+Optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
    InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
    bool &KnownBitsComputed) const {
  switch (II.getIntrinsicID()) {
@ -1771,10 +1770,10 @@ std::optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
    break;
  }
  }
-  return std::nullopt;
+  return None;
 }

-std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
+Optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
    InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
    APInt &UndefElts2, APInt &UndefElts3,
    std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -2026,5 +2025,5 @@ std::optional<Value *> X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
    UndefElts.setHighBits(VWidth / 2);
    break;
  }
-  return std::nullopt;
+  return None;
 }
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -56,7 +56,6 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
-#include <optional>

 using namespace llvm;

@ -76,7 +75,7 @@ struct CostKindCosts {
  unsigned CodeSizeCost = ~0U;
  unsigned SizeAndLatencyCost = ~0U;

-  std::optional<unsigned>
+  llvm::Optional<unsigned>
  operator[](TargetTransformInfo::TargetCostKind Kind) const {
    unsigned Cost = ~0U;
    switch (Kind) {
@ -94,7 +93,7 @@ struct CostKindCosts {
      break;
    }
    if (Cost == ~0U)
-      return std::nullopt;
+      return None;
    return Cost;
  }
 };
@ -109,7 +108,7 @@ X86TTIImpl::getPopcntSupport(unsigned TyWidth) {
  return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software;
 }

-std::optional<unsigned> X86TTIImpl::getCacheSize(
+llvm::Optional<unsigned> X86TTIImpl::getCacheSize(
  TargetTransformInfo::CacheLevel Level) const {
  switch (Level) {
  case TargetTransformInfo::CacheLevel::L1D:
@ -139,7 +138,7 @@ std::optional<unsigned> X86TTIImpl::getCacheSize(
  llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
 }

-std::optional<unsigned> X86TTIImpl::getCacheAssociativity(
+llvm::Optional<unsigned> X86TTIImpl::getCacheAssociativity(
  TargetTransformInfo::CacheLevel Level) const {
  //   - Penryn
  //   - Nehalem
@ -4910,7 +4909,7 @@ InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,

 InstructionCost
 X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
-                                       std::optional<FastMathFlags> FMF,
+                                       Optional<FastMathFlags> FMF,
                                       TTI::TargetCostKind CostKind) {
  if (TTI::requiresOrderedReduction(FMF))
    return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@ -19,7 +19,6 @@
 #include "X86TargetMachine.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
-#include <optional>

 namespace llvm {

@ -114,9 +113,9 @@ public:

  /// \name Cache TTI Implementation
  /// @{
-  std::optional<unsigned> getCacheSize(
+  llvm::Optional<unsigned> getCacheSize(
    TargetTransformInfo::CacheLevel Level) const override;
-  std::optional<unsigned> getCacheAssociativity(
+  llvm::Optional<unsigned> getCacheAssociativity(
    TargetTransformInfo::CacheLevel Level) const override;
  /// @}

@ -172,13 +171,13 @@ public:
  InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
                                            const SCEV *Ptr);

-  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
-                                                    IntrinsicInst &II) const;
-  std::optional<Value *>
+  Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+                                               IntrinsicInst &II) const;
+  Optional<Value *>
  simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
                                   APInt DemandedMask, KnownBits &Known,
                                   bool &KnownBitsComputed) const;
-  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+  Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
      InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
      APInt &UndefElts2, APInt &UndefElts3,
      std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -190,7 +189,7 @@ public:
                                        TTI::TargetCostKind CostKind);

  InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
-                                             std::optional<FastMathFlags> FMF,
+                                             Optional<FastMathFlags> FMF,
                                             TTI::TargetCostKind CostKind);

  InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@ -72,7 +72,6 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
-#include <optional>
 #include <utility>
 #include <vector>

@ -2828,7 +2827,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
  }
  default: {
    // Handle target specific intrinsics
-    std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
+    Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
    if (V)
      return V.value();
    break;
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@ -971,7 +971,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
      }
      default: {
        // Handle target specific intrinsics
-        std::optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
+        Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
            *II, DemandedMask, Known, KnownBitsComputed);
        if (V)
          return V.value();
@ -1696,7 +1696,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
    }
    default: {
      // Handle target specific intrinsics
-      std::optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
+      Optional<Value *> V = targetSimplifyDemandedVectorEltsIntrinsic(
          *II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
          simplifyAndSetOp);
      if (V)
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@ -170,16 +170,16 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024),
 static cl::opt<unsigned> ShouldLowerDbgDeclare("instcombine-lower-dbg-declare",
                                               cl::Hidden, cl::init(true));

-std::optional<Instruction *>
+Optional<Instruction *>
 InstCombiner::targetInstCombineIntrinsic(IntrinsicInst &II) {
  // Handle target specific intrinsics
  if (II.getCalledFunction()->isTargetIntrinsic()) {
    return TTI.instCombineIntrinsic(*this, II);
  }
-  return std::nullopt;
+  return None;
 }

-std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
+Optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
    IntrinsicInst &II, APInt DemandedMask, KnownBits &Known,
    bool &KnownBitsComputed) {
  // Handle target specific intrinsics
@ -187,10 +187,10 @@ std::optional<Value *> InstCombiner::targetSimplifyDemandedUseBitsIntrinsic(
    return TTI.simplifyDemandedUseBitsIntrinsic(*this, II, DemandedMask, Known,
                                                KnownBitsComputed);
  }
-  return std::nullopt;
+  return None;
 }

-std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
+Optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
    IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2,
    APInt &UndefElts3,
    std::function<void(Instruction *, unsigned, APInt, APInt &)>
@ -201,7 +201,7 @@ std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
        *this, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
        SimplifyAndSetOp);
  }
-  return std::nullopt;
+  return None;
 }

 Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@ -13,15 +13,16 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <optional>

 using namespace llvm;

-void llvm::createMemCpyLoopKnownSize(
-    Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr,
-    ConstantInt *CopyLen, Align SrcAlign, Align DstAlign, bool SrcIsVolatile,
-    bool DstIsVolatile, bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<uint32_t> AtomicElementSize) {
+void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr,
+                                     Value *DstAddr, ConstantInt *CopyLen,
+                                     Align SrcAlign, Align DstAlign,
+                                     bool SrcIsVolatile, bool DstIsVolatile,
+                                     bool CanOverlap,
+                                     const TargetTransformInfo &TTI,
+                                     Optional<uint32_t> AtomicElementSize) {
  // No need to expand zero length copies.
  if (CopyLen->isZero())
    return;
@ -172,11 +173,13 @@ void llvm::createMemCpyLoopKnownSize(
         "Bytes copied should match size in the call!");
 }

-void llvm::createMemCpyLoopUnknownSize(
-    Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen,
-    Align SrcAlign, Align DstAlign, bool SrcIsVolatile, bool DstIsVolatile,
-    bool CanOverlap, const TargetTransformInfo &TTI,
-    std::optional<uint32_t> AtomicElementSize) {
+void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore,
+                                       Value *SrcAddr, Value *DstAddr,
+                                       Value *CopyLen, Align SrcAlign,
+                                       Align DstAlign, bool SrcIsVolatile,
+                                       bool DstIsVolatile, bool CanOverlap,
+                                       const TargetTransformInfo &TTI,
+                                       Optional<uint32_t> AtomicElementSize) {
  BasicBlock *PreLoopBB = InsertBefore->getParent();
  BasicBlock *PostLoopBB =
      PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion");