[AMDGPU] Allow finer grain control of an unaligned access speed
A target can return if a misaligned access is 'fast' as defined by the target or not. In reality there can be different levels of 'fast' and 'slow'. This patch changes the boolean 'Fast' argument of the allowsMisalignedMemoryAccesses family of functions to an unsigned representing its speed. A target can still define it as it wants and the direct translation of the current code uses 0 and 1 for current false and true. This makes the change an NFC. Subsequent patch will start using an actual value of speed in the load/store vectorizer to compare if a vectorized access going to be not just fast, but not slower than before. Differential Revision: https://reviews.llvm.org/D124217
This commit is contained in:
parent
6faf5d7245
commit
bcaf31ec3f
|
@ -826,7 +826,7 @@ public:
|
|||
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
|
||||
unsigned AddressSpace = 0,
|
||||
Align Alignment = Align(1),
|
||||
bool *Fast = nullptr) const;
|
||||
unsigned *Fast = nullptr) const;
|
||||
|
||||
/// Return hardware support for population count.
|
||||
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
|
||||
|
@ -1691,7 +1691,7 @@ public:
|
|||
unsigned BitWidth,
|
||||
unsigned AddressSpace,
|
||||
Align Alignment,
|
||||
bool *Fast) = 0;
|
||||
unsigned *Fast) = 0;
|
||||
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
|
||||
virtual bool haveFastSqrt(Type *Ty) = 0;
|
||||
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
|
||||
|
@ -2182,7 +2182,7 @@ public:
|
|||
}
|
||||
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
|
||||
unsigned AddressSpace, Align Alignment,
|
||||
bool *Fast) override {
|
||||
unsigned *Fast) override {
|
||||
return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
|
||||
Alignment, Fast);
|
||||
}
|
||||
|
|
|
@ -366,7 +366,7 @@ public:
|
|||
|
||||
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
|
||||
unsigned AddressSpace, Align Alignment,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -256,7 +256,7 @@ public:
|
|||
/// @{
|
||||
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
|
||||
unsigned AddressSpace, Align Alignment,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
EVT E = EVT::getIntegerVT(Context, BitWidth);
|
||||
return getTLI()->allowsMisalignedMemoryAccesses(
|
||||
E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
|
||||
|
|
|
@ -585,7 +585,7 @@ public:
|
|||
getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
|
||||
return false;
|
||||
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
|
||||
MMO, &Fast) && Fast;
|
||||
}
|
||||
|
@ -1715,15 +1715,16 @@ public:
|
|||
///
|
||||
/// This function returns true if the target allows unaligned memory accesses
|
||||
/// of the specified type in the given address space. If true, it also returns
|
||||
/// whether the unaligned memory access is "fast" in the last argument by
|
||||
/// reference. This is used, for example, in situations where an array
|
||||
/// copy/move/set is converted to a sequence of store operations. Its use
|
||||
/// helps to ensure that such replacements don't generate code that causes an
|
||||
/// alignment error (trap) on the target machine.
|
||||
/// a relative speed of the unaligned memory access in the last argument by
|
||||
/// reference. The higher the speed number the faster the operation comparing
|
||||
/// to a number returned by another such call. This is used, for example, in
|
||||
/// situations where an array copy/move/set is converted to a sequence of
|
||||
/// store operations. Its use helps to ensure that such replacements don't
|
||||
/// generate code that causes an alignment error (trap) on the target machine.
|
||||
virtual bool allowsMisalignedMemoryAccesses(
|
||||
EVT, unsigned AddrSpace = 0, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool * /*Fast*/ = nullptr) const {
|
||||
unsigned * /*Fast*/ = nullptr) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1731,51 +1732,51 @@ public:
|
|||
virtual bool allowsMisalignedMemoryAccesses(
|
||||
LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool * /*Fast*/ = nullptr) const {
|
||||
unsigned * /*Fast*/ = nullptr) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// This function returns true if the memory access is aligned or if the
|
||||
/// target allows this specific unaligned memory access. If the access is
|
||||
/// allowed, the optional final parameter returns if the access is also fast
|
||||
/// (as defined by the target).
|
||||
/// allowed, the optional final parameter returns a relative speed of the
|
||||
/// access (as defined by the target).
|
||||
bool allowsMemoryAccessForAlignment(
|
||||
LLVMContext &Context, const DataLayout &DL, EVT VT,
|
||||
unsigned AddrSpace = 0, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *Fast = nullptr) const;
|
||||
unsigned *Fast = nullptr) const;
|
||||
|
||||
/// Return true if the memory access of this type is aligned or if the target
|
||||
/// allows this specific unaligned access for the given MachineMemOperand.
|
||||
/// If the access is allowed, the optional final parameter returns if the
|
||||
/// access is also fast (as defined by the target).
|
||||
/// If the access is allowed, the optional final parameter returns a relative
|
||||
/// speed of the access (as defined by the target).
|
||||
bool allowsMemoryAccessForAlignment(LLVMContext &Context,
|
||||
const DataLayout &DL, EVT VT,
|
||||
const MachineMemOperand &MMO,
|
||||
bool *Fast = nullptr) const;
|
||||
unsigned *Fast = nullptr) const;
|
||||
|
||||
/// Return true if the target supports a memory access of this type for the
|
||||
/// given address space and alignment. If the access is allowed, the optional
|
||||
/// final parameter returns if the access is also fast (as defined by the
|
||||
/// target).
|
||||
/// final parameter returns the relative speed of the access (as defined by
|
||||
/// the target).
|
||||
virtual bool
|
||||
allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
|
||||
unsigned AddrSpace = 0, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *Fast = nullptr) const;
|
||||
unsigned *Fast = nullptr) const;
|
||||
|
||||
/// Return true if the target supports a memory access of this type for the
|
||||
/// given MachineMemOperand. If the access is allowed, the optional
|
||||
/// final parameter returns if the access is also fast (as defined by the
|
||||
/// final parameter returns the relative access speed (as defined by the
|
||||
/// target).
|
||||
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
|
||||
const MachineMemOperand &MMO,
|
||||
bool *Fast = nullptr) const;
|
||||
unsigned *Fast = nullptr) const;
|
||||
|
||||
/// LLT handling variant.
|
||||
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,
|
||||
const MachineMemOperand &MMO,
|
||||
bool *Fast = nullptr) const;
|
||||
unsigned *Fast = nullptr) const;
|
||||
|
||||
/// Returns the target specific optimal type for load and store operations as
|
||||
/// a result of memset, memcpy, and memmove lowering.
|
||||
|
|
|
@ -558,11 +558,12 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
|
|||
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
|
||||
unsigned BitWidth,
|
||||
unsigned AddressSpace,
|
||||
Align Alignment,
|
||||
bool *Fast) const {
|
||||
bool
|
||||
TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
|
||||
unsigned BitWidth,
|
||||
unsigned AddressSpace,
|
||||
Align Alignment,
|
||||
unsigned *Fast) const {
|
||||
return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
|
||||
AddressSpace, Alignment, Fast);
|
||||
}
|
||||
|
|
|
@ -3527,7 +3527,7 @@ bool CombinerHelper::matchLoadOrCombine(
|
|||
// Load must be allowed and fast on the target.
|
||||
LLVMContext &C = MF.getFunction().getContext();
|
||||
auto &DL = MF.getDataLayout();
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
|
||||
!Fast)
|
||||
return false;
|
||||
|
@ -3732,7 +3732,7 @@ bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
|
|||
const auto &DL = LastStore.getMF()->getDataLayout();
|
||||
auto &C = LastStore.getMF()->getFunction().getContext();
|
||||
// Check that a store of the wide type is both allowed and fast on the target
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
bool Allowed = getTargetLowering().allowsMemoryAccess(
|
||||
C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
|
||||
if (!Allowed || !Fast)
|
||||
|
|
|
@ -7394,7 +7394,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
|
|||
|
||||
// If the new LLT cannot cover all of the remaining bits, then consider
|
||||
// issuing a (or a pair of) unaligned and overlapping load / store.
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
|
||||
MVT VT = getMVTForLLT(Ty);
|
||||
if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
|
||||
|
|
|
@ -8201,7 +8201,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
|
|||
|
||||
// Check that a store of the wide type is both allowed and fast on the target
|
||||
const DataLayout &Layout = DAG.getDataLayout();
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
|
||||
*FirstStore->getMemOperand(), &Fast);
|
||||
if (!Allowed || !Fast)
|
||||
|
@ -8447,7 +8447,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
|
|||
return SDValue();
|
||||
|
||||
// Check that a load of the wide type is both allowed and fast on the target
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
bool Allowed =
|
||||
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
|
||||
*FirstLoad->getMemOperand(), &Fast);
|
||||
|
@ -9954,7 +9954,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
|
|||
uint64_t PtrOff =
|
||||
IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
|
||||
Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
|
||||
RHS->getAddressSpace(), NewAlign,
|
||||
RHS->getMemOperand()->getFlags(), &Fast) &&
|
||||
|
@ -13859,7 +13859,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
|
|||
LD1->getAddressSpace() != LD2->getAddressSpace())
|
||||
return SDValue();
|
||||
|
||||
bool LD1Fast = false;
|
||||
unsigned LD1Fast = 0;
|
||||
EVT LD1VT = LD1->getValueType(0);
|
||||
unsigned LD1Bytes = LD1VT.getStoreSize();
|
||||
if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
|
||||
|
@ -17575,7 +17575,7 @@ struct LoadedSlice {
|
|||
|
||||
// Check if it will be merged with the load.
|
||||
// 1. Check the alignment / fast memory access constraint.
|
||||
bool IsFast = false;
|
||||
unsigned IsFast = 0;
|
||||
if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
|
||||
Origin->getAddressSpace(), getAlign(),
|
||||
Origin->getMemOperand()->getFlags(), &IsFast) ||
|
||||
|
@ -18078,7 +18078,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
|
|||
if (DAG.getDataLayout().isBigEndian())
|
||||
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
|
||||
|
||||
bool IsFast = false;
|
||||
unsigned IsFast = 0;
|
||||
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
|
||||
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
|
||||
LD->getAddressSpace(), NewAlign,
|
||||
|
@ -18137,7 +18137,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
|
|||
if (VTSize.isScalable())
|
||||
return SDValue();
|
||||
|
||||
bool FastLD = false, FastST = false;
|
||||
unsigned FastLD = 0, FastST = 0;
|
||||
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
|
||||
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
|
||||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
|
||||
|
@ -18749,7 +18749,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
|
|||
// Find a legal type for the constant store.
|
||||
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
|
||||
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
|
||||
bool IsFast = false;
|
||||
unsigned IsFast = 0;
|
||||
|
||||
// Break early when size is too large to be legal.
|
||||
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
|
@ -18859,7 +18859,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
|
|||
// Find a legal type for the vector store.
|
||||
unsigned Elts = (i + 1) * NumMemElts;
|
||||
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
|
||||
bool IsFast = false;
|
||||
unsigned IsFast = 0;
|
||||
|
||||
// Break early when size is too large to be legal.
|
||||
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
|
@ -19012,8 +19012,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
|
|||
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
|
||||
break;
|
||||
|
||||
bool IsFastSt = false;
|
||||
bool IsFastLd = false;
|
||||
unsigned IsFastSt = 0;
|
||||
unsigned IsFastLd = 0;
|
||||
// Don't try vector types if we need a rotate. We may still fail the
|
||||
// legality checks for the integer type, but we can't handle the rotate
|
||||
// case with vectors.
|
||||
|
@ -20098,7 +20098,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
|
|||
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
|
||||
}
|
||||
|
||||
bool IsFast = false;
|
||||
unsigned IsFast = 0;
|
||||
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
|
||||
OriginalLoad->getAddressSpace(), Alignment,
|
||||
OriginalLoad->getMemOperand()->getFlags(),
|
||||
|
|
|
@ -261,7 +261,7 @@ bool TargetLowering::findOptimalMemOpLowering(
|
|||
|
||||
// If the new VT cannot cover all of the remaining bits, then consider
|
||||
// issuing a (or a pair of) unaligned and overlapping load / store.
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
|
||||
allowsMisalignedMemoryAccesses(
|
||||
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
|
||||
|
|
|
@ -1716,7 +1716,7 @@ uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
|
|||
|
||||
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
|
||||
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
|
||||
Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
|
||||
// Check if the specified alignment is sufficient based on the data layout.
|
||||
// TODO: While using the data layout works in practice, a better solution
|
||||
// would be to implement this check directly (make this a virtual function).
|
||||
|
@ -1726,7 +1726,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
|
|||
if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) {
|
||||
// Assume that an access that meets the ABI-specified alignment is fast.
|
||||
if (Fast != nullptr)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1736,7 +1736,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
|
|||
|
||||
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
|
||||
LLVMContext &Context, const DataLayout &DL, EVT VT,
|
||||
const MachineMemOperand &MMO, bool *Fast) const {
|
||||
const MachineMemOperand &MMO, unsigned *Fast) const {
|
||||
return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
|
||||
MMO.getAlign(), MMO.getFlags(), Fast);
|
||||
}
|
||||
|
@ -1745,7 +1745,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
|
|||
const DataLayout &DL, EVT VT,
|
||||
unsigned AddrSpace, Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
|
||||
Flags, Fast);
|
||||
}
|
||||
|
@ -1753,7 +1753,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
|
|||
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
|
||||
const DataLayout &DL, EVT VT,
|
||||
const MachineMemOperand &MMO,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
|
||||
MMO.getFlags(), Fast);
|
||||
}
|
||||
|
@ -1761,7 +1761,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
|
|||
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
|
||||
const DataLayout &DL, LLT Ty,
|
||||
const MachineMemOperand &MMO,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
|
||||
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
|
||||
MMO.getFlags(), Fast);
|
||||
|
|
|
@ -2057,7 +2057,7 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
|
|||
|
||||
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
if (Subtarget->requiresStrictAlign())
|
||||
return false;
|
||||
|
||||
|
@ -2082,7 +2082,7 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
|
|||
// Same as above but handling LLTs instead.
|
||||
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
|
||||
LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
if (Subtarget->requiresStrictAlign())
|
||||
return false;
|
||||
|
||||
|
@ -14329,7 +14329,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
|
|||
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
|
||||
if (Op.isAligned(AlignCheck))
|
||||
return true;
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
|
||||
MachineMemOperand::MONone, &Fast) &&
|
||||
Fast;
|
||||
|
@ -14359,7 +14359,7 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
|
|||
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
|
||||
if (Op.isAligned(AlignCheck))
|
||||
return true;
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
|
||||
MachineMemOperand::MONone, &Fast) &&
|
||||
Fast;
|
||||
|
|
|
@ -537,12 +537,12 @@ public:
|
|||
bool allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *Fast = nullptr) const override;
|
||||
unsigned *Fast = nullptr) const override;
|
||||
/// LLT variant.
|
||||
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
|
||||
Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast = nullptr) const override;
|
||||
unsigned *Fast = nullptr) const override;
|
||||
|
||||
/// Provide custom lowering hooks for some operations.
|
||||
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
||||
|
|
|
@ -683,7 +683,7 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
|
|||
if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
|
||||
return false;
|
||||
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
|
||||
CastTy, MMO, &Fast) &&
|
||||
Fast;
|
||||
|
@ -2903,7 +2903,7 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
|
|||
unsigned Size = VT.getStoreSize();
|
||||
Align Alignment = LN->getAlign();
|
||||
if (Alignment < Size && isTypeLegal(VT)) {
|
||||
bool IsFast;
|
||||
unsigned IsFast;
|
||||
unsigned AS = LN->getAddressSpace();
|
||||
|
||||
// Expand unaligned loads earlier than legalization. Due to visitation order
|
||||
|
@ -2956,7 +2956,7 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
|
|||
SelectionDAG &DAG = DCI.DAG;
|
||||
Align Alignment = SN->getAlign();
|
||||
if (Alignment < Size && isTypeLegal(VT)) {
|
||||
bool IsFast;
|
||||
unsigned IsFast;
|
||||
unsigned AS = SN->getAddressSpace();
|
||||
|
||||
// Expand unaligned stores earlier than legalization. Due to visitation
|
||||
|
|
|
@ -406,7 +406,7 @@ static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
|
|||
|
||||
// Do not widen if it would introduce a slow unaligned load.
|
||||
const SITargetLowering *TLI = ST.getTargetLowering();
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
return TLI->allowsMisalignedMemoryAccessesImpl(
|
||||
RoundedSize, AddrSpace, Align(AlignInBits / 8),
|
||||
MachineMemOperand::MOLoad, &Fast) &&
|
||||
|
|
|
@ -1521,9 +1521,9 @@ bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
|
|||
|
||||
bool R600TargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *IsFast) const {
|
||||
unsigned *IsFast) const {
|
||||
if (IsFast)
|
||||
*IsFast = false;
|
||||
*IsFast = 0;
|
||||
|
||||
if (!VT.isSimple() || VT == MVT::Other)
|
||||
return false;
|
||||
|
@ -1533,7 +1533,7 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses(
|
|||
|
||||
// TODO: This is a rough estimate.
|
||||
if (IsFast)
|
||||
*IsFast = true;
|
||||
*IsFast = 1;
|
||||
|
||||
return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@ public:
|
|||
bool allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AS, Align Alignment,
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *IsFast = nullptr) const override;
|
||||
unsigned *IsFast = nullptr) const override;
|
||||
|
||||
bool canCombineTruncStore(EVT ValVT, EVT MemVT,
|
||||
bool LegalOperations) const override {
|
||||
|
|
|
@ -1386,9 +1386,9 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
|
|||
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
||||
unsigned Size, unsigned AddrSpace, Align Alignment,
|
||||
MachineMemOperand::Flags Flags, bool *IsFast) const {
|
||||
MachineMemOperand::Flags Flags, unsigned *IsFast) const {
|
||||
if (IsFast)
|
||||
*IsFast = false;
|
||||
*IsFast = 0;
|
||||
|
||||
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
|
||||
|
@ -1427,7 +1427,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
|||
// ds_write2_b32 depending on the alignment. In either case with either
|
||||
// alignment there is no faster way of doing this.
|
||||
if (IsFast)
|
||||
*IsFast = true;
|
||||
*IsFast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1467,7 +1467,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
|||
// will be more of them, so overall we will pay less penalty issuing a
|
||||
// single instruction.
|
||||
if (IsFast)
|
||||
*IsFast = Alignment >= RequiredAlignment || Alignment < Align(4);
|
||||
*IsFast= Alignment >= RequiredAlignment || Alignment < Align(4);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1530,14 +1530,14 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
|
|||
// byte-address are ignored, thus forcing Dword alignment.
|
||||
// This applies to private, global, and constant memory.
|
||||
if (IsFast)
|
||||
*IsFast = true;
|
||||
*IsFast = 1;
|
||||
|
||||
return Size >= 32 && Alignment >= Align(4);
|
||||
}
|
||||
|
||||
bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *IsFast) const {
|
||||
unsigned *IsFast) const {
|
||||
bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
|
||||
Alignment, Flags, IsFast);
|
||||
|
||||
|
@ -1550,7 +1550,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
|
|||
// which would be equally misaligned.
|
||||
// This is only used by the common passes, selection always calls the
|
||||
// allowsMisalignedMemoryAccessesImpl version.
|
||||
*IsFast = true;
|
||||
*IsFast= 1;
|
||||
}
|
||||
|
||||
return Allow;
|
||||
|
@ -8755,7 +8755,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
|||
llvm_unreachable("unsupported private_element_size");
|
||||
}
|
||||
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
auto Flags = Load->getMemOperand()->getFlags();
|
||||
if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
|
||||
Load->getAlign(), Flags, &Fast) &&
|
||||
|
@ -9254,7 +9254,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|||
llvm_unreachable("unsupported private_element_size");
|
||||
}
|
||||
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
auto Flags = Store->getMemOperand()->getFlags();
|
||||
if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
|
||||
Store->getAlign(), Flags, &Fast) &&
|
||||
|
|
|
@ -291,14 +291,14 @@ public:
|
|||
bool allowsMisalignedMemoryAccessesImpl(
|
||||
unsigned Size, unsigned AddrSpace, Align Alignment,
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *IsFast = nullptr) const;
|
||||
unsigned *IsFast = nullptr) const;
|
||||
|
||||
bool allowsMisalignedMemoryAccesses(
|
||||
LLT Ty, unsigned AddrSpace, Align Alignment,
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *IsFast = nullptr) const override {
|
||||
unsigned *IsFast = nullptr) const override {
|
||||
if (IsFast)
|
||||
*IsFast = false;
|
||||
*IsFast = 0;
|
||||
return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace,
|
||||
Alignment, Flags, IsFast);
|
||||
}
|
||||
|
@ -306,7 +306,7 @@ public:
|
|||
bool allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AS, Align Alignment,
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *IsFast = nullptr) const override;
|
||||
unsigned *IsFast = nullptr) const override;
|
||||
|
||||
EVT getOptimalMemOpType(const MemOp &Op,
|
||||
const AttributeList &FuncAttributes) const override;
|
||||
|
|
|
@ -18817,7 +18817,7 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
|
|||
bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
|
||||
Align Alignment,
|
||||
MachineMemOperand::Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
// Depends what it gets converted into if the type is weird.
|
||||
if (!VT.isSimple())
|
||||
return false;
|
||||
|
@ -18841,7 +18841,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
|
|||
// A big-endian target may also explicitly support unaligned accesses
|
||||
if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -18853,7 +18853,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
|
|||
if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
|
||||
Ty == MVT::v2i1)) {
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -18879,7 +18879,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
|
|||
Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
|
||||
Ty == MVT::v2f64) {
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -18892,7 +18892,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(
|
|||
// See if we can use NEON instructions for this...
|
||||
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
|
||||
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
if (Op.size() >= 16 &&
|
||||
(Op.isAligned(Align(16)) ||
|
||||
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
|
||||
|
|
|
@ -445,7 +445,7 @@ class VectorType;
|
|||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
|
||||
EVT getOptimalMemOpType(const MemOp &Op,
|
||||
const AttributeList &FuncAttributes) const override;
|
||||
|
|
|
@ -3674,7 +3674,7 @@ EVT HexagonTargetLowering::getOptimalMemOpType(
|
|||
|
||||
bool HexagonTargetLowering::allowsMemoryAccess(
|
||||
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
|
||||
Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
|
||||
MVT SVT = VT.getSimpleVT();
|
||||
if (Subtarget.isHVXVectorType(SVT, true))
|
||||
return allowsHvxMemoryAccess(SVT, Flags, Fast);
|
||||
|
@ -3684,12 +3684,12 @@ bool HexagonTargetLowering::allowsMemoryAccess(
|
|||
|
||||
bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
MVT SVT = VT.getSimpleVT();
|
||||
if (Subtarget.isHVXVectorType(SVT, true))
|
||||
return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
|
||||
if (Fast)
|
||||
*Fast = false;
|
||||
*Fast = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -326,12 +326,12 @@ public:
|
|||
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
|
||||
unsigned AddrSpace, Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
|
||||
/// Returns relocation base for the given PIC jumptable.
|
||||
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
|
||||
|
@ -442,10 +442,10 @@ private:
|
|||
SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
|
||||
|
||||
bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const;
|
||||
unsigned *Fast) const;
|
||||
bool allowsHvxMisalignedMemoryAccesses(MVT VecTy,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const;
|
||||
unsigned *Fast) const;
|
||||
void AdjustHvxInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const;
|
||||
|
||||
bool isHvxSingleTy(MVT Ty) const;
|
||||
|
|
|
@ -572,7 +572,7 @@ HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
|
|||
}
|
||||
|
||||
bool HexagonTargetLowering::allowsHvxMemoryAccess(
|
||||
MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
|
||||
// Bool vectors are excluded by default, but make it explicit to
|
||||
// emphasize that bool vectors cannot be loaded or stored.
|
||||
// Also, disallow double vector stores (to prevent unnecessary
|
||||
|
@ -582,17 +582,17 @@ bool HexagonTargetLowering::allowsHvxMemoryAccess(
|
|||
if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
|
||||
return false;
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
|
||||
MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
|
||||
MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
|
||||
if (!Subtarget.isHVXVectorType(VecTy))
|
||||
return false;
|
||||
// XXX Should this be false? vmemu are a bit slower than vmem.
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -156,7 +156,7 @@ llvm::createMips16TargetLowering(const MipsTargetMachine &TM,
|
|||
}
|
||||
|
||||
bool Mips16TargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
|
||||
EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ namespace llvm {
|
|||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
|
||||
Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
|
||||
MachineBasicBlock *
|
||||
EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
|
|
|
@ -415,7 +415,7 @@ SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
|
||||
bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
|
||||
EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
|
||||
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
|
||||
|
||||
if (Subtarget.systemSupportsUnalignedAccess()) {
|
||||
|
@ -424,7 +424,7 @@ bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
|
|||
// a hybrid of the two but it's expected that most implementations will
|
||||
// handle the majority of cases in hardware.
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -432,7 +432,7 @@ bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
|
|||
case MVT::i64:
|
||||
case MVT::i32:
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -43,7 +43,7 @@ class TargetRegisterClass;
|
|||
bool allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AS = 0, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *Fast = nullptr) const override;
|
||||
unsigned *Fast = nullptr) const override;
|
||||
|
||||
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
||||
|
||||
|
|
|
@ -16711,7 +16711,7 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
|
|||
|
||||
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
|
||||
MachineMemOperand::Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
if (DisablePPCUnaligned)
|
||||
return false;
|
||||
|
||||
|
@ -16742,7 +16742,7 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
|
|||
return false;
|
||||
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1076,7 +1076,7 @@ namespace llvm {
|
|||
bool allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *Fast = nullptr) const override;
|
||||
unsigned *Fast = nullptr) const override;
|
||||
|
||||
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
|
||||
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
|
||||
|
|
|
@ -13266,10 +13266,10 @@ bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
|
|||
|
||||
bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
if (!VT.isVector()) {
|
||||
if (Fast)
|
||||
*Fast = false;
|
||||
*Fast = 0;
|
||||
return Subtarget.enableUnalignedScalarMem();
|
||||
}
|
||||
|
||||
|
@ -13277,7 +13277,7 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
|
|||
EVT ElemVT = VT.getVectorElementType();
|
||||
if (Alignment >= ElemVT.getStoreSize()) {
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -543,7 +543,7 @@ public:
|
|||
bool allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
|
||||
bool *Fast = nullptr) const override;
|
||||
unsigned *Fast = nullptr) const override;
|
||||
|
||||
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
|
||||
SDValue Val, SDValue *Parts,
|
||||
|
|
|
@ -861,12 +861,12 @@ bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
|
|||
}
|
||||
|
||||
bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
|
||||
EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
|
||||
// Unaligned accesses should never be slower than the expanded version.
|
||||
// We check specifically for aligned accesses in the few cases where
|
||||
// they are required.
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -455,7 +455,7 @@ public:
|
|||
Instruction *I = nullptr) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
bool
|
||||
findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
|
||||
const MemOp &Op, unsigned DstAS, unsigned SrcAS,
|
||||
|
|
|
@ -887,10 +887,10 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
|||
unsigned AddrSpace,
|
||||
Align A,
|
||||
MachineMemOperand::Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
if (Fast) {
|
||||
// It's fast anytime on VE
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -223,7 +223,7 @@ public:
|
|||
/// specified type.
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align A,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
|
||||
/// Inline Assembly {
|
||||
|
||||
|
|
|
@ -781,7 +781,7 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
|||
|
||||
bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
|
||||
MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
|
||||
MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
|
||||
// WebAssembly supports unaligned accesses, though it should be declared
|
||||
// with the p2align attribute on loads and stores which do so, and there
|
||||
// may be a performance impact. We tell LLVM they're "fast" because
|
||||
|
@ -789,7 +789,7 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
|
|||
// of constants, etc.), WebAssembly implementations will either want the
|
||||
// unaligned access or they'll split anyway.
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ private:
|
|||
Instruction *I = nullptr) const override;
|
||||
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
|
||||
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
|
||||
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
||||
|
|
|
@ -2730,12 +2730,12 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
|
|||
|
||||
bool X86TargetLowering::allowsMisalignedMemoryAccesses(
|
||||
EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const {
|
||||
unsigned *Fast) const {
|
||||
if (Fast) {
|
||||
switch (VT.getSizeInBits()) {
|
||||
default:
|
||||
// 8-byte and under are always assumed to be fast.
|
||||
*Fast = true;
|
||||
*Fast = 1;
|
||||
break;
|
||||
case 128:
|
||||
*Fast = !Subtarget.isUnalignedMem16Slow();
|
||||
|
@ -49628,7 +49628,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
|
|||
// into two 16-byte operations. Also split non-temporal aligned loads on
|
||||
// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
|
||||
ISD::LoadExtType Ext = Ld->getExtensionType();
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
|
||||
Ext == ISD::NON_EXTLOAD &&
|
||||
((Ld->isNonTemporal() && !Subtarget.hasInt256() &&
|
||||
|
@ -50086,7 +50086,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
|||
|
||||
// If we are saving a 32-byte vector and 32-byte stores are slow, such as on
|
||||
// Sandy Bridge, perform two 16-byte stores.
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
if (VT.is256BitVector() && StVT == VT &&
|
||||
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
|
||||
*St->getMemOperand(), &Fast) &&
|
||||
|
@ -54822,7 +54822,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
|
|||
// Fold subvector loads into one.
|
||||
// If needed, look through bitcasts to get to the load.
|
||||
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
|
||||
bool Fast;
|
||||
unsigned Fast;
|
||||
const X86TargetLowering *TLI = Subtarget.getTargetLowering();
|
||||
if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
|
||||
*FirstLd->getMemOperand(), &Fast) &&
|
||||
|
|
|
@ -1005,7 +1005,7 @@ namespace llvm {
|
|||
/// specified type. Returns whether it is "fast" in the last argument.
|
||||
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
|
||||
MachineMemOperand::Flags Flags,
|
||||
bool *Fast) const override;
|
||||
unsigned *Fast) const override;
|
||||
|
||||
/// Provide custom lowering hooks for some operations.
|
||||
///
|
||||
|
|
|
@ -775,7 +775,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
|
|||
return false;
|
||||
|
||||
unsigned AS = LI1->getPointerAddressSpace();
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
|
||||
AS, LI1->getAlign(), &Fast);
|
||||
if (!Allowed || !Fast)
|
||||
|
|
|
@ -1320,7 +1320,7 @@ bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
|
|||
if (Alignment.value() % SzInBytes == 0)
|
||||
return false;
|
||||
|
||||
bool Fast = false;
|
||||
unsigned Fast = 0;
|
||||
bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
|
||||
SzInBytes * 8, AddressSpace,
|
||||
Alignment, &Fast);
|
||||
|
|
Loading…
Reference in New Issue