[AMDGPU] Allow finer grain control of an unaligned access speed

A target can return if a misaligned access is 'fast' as defined
by the target or not. In reality there can be different levels
of 'fast' and 'slow'. This patch changes the boolean 'Fast'
argument of the allowsMisalignedMemoryAccesses family of functions
to an unsigned representing its speed.

A target can still define it as it wants and the direct translation
of the current code uses 0 and 1 for current false and true. This
makes the change an NFC.

Subsequent patch will start using an actual value of speed in
the load/store vectorizer to compare if a vectorized access going
to be not just fast, but not slower than before.

Differential Revision: https://reviews.llvm.org/D124217
This commit is contained in:
Stanislav Mekhanoshin 2022-04-21 16:23:11 -07:00
parent 6faf5d7245
commit bcaf31ec3f
41 changed files with 128 additions and 126 deletions

View File

@ -826,7 +826,7 @@ public:
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace = 0,
Align Alignment = Align(1),
bool *Fast = nullptr) const;
unsigned *Fast = nullptr) const;
/// Return hardware support for population count.
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
@ -1691,7 +1691,7 @@ public:
unsigned BitWidth,
unsigned AddressSpace,
Align Alignment,
bool *Fast) = 0;
unsigned *Fast) = 0;
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = 0;
@ -2182,7 +2182,7 @@ public:
}
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, Align Alignment,
bool *Fast) override {
unsigned *Fast) override {
return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);
}

View File

@ -366,7 +366,7 @@ public:
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, Align Alignment,
bool *Fast) const {
unsigned *Fast) const {
return false;
}

View File

@ -256,7 +256,7 @@ public:
/// @{
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
unsigned AddressSpace, Align Alignment,
bool *Fast) const {
unsigned *Fast) const {
EVT E = EVT::getIntegerVT(Context, BitWidth);
return getTLI()->allowsMisalignedMemoryAccesses(
E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);

View File

@ -585,7 +585,7 @@ public:
getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
return false;
bool Fast = false;
unsigned Fast = 0;
return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
MMO, &Fast) && Fast;
}
@ -1715,15 +1715,16 @@ public:
///
/// This function returns true if the target allows unaligned memory accesses
/// of the specified type in the given address space. If true, it also returns
/// whether the unaligned memory access is "fast" in the last argument by
/// reference. This is used, for example, in situations where an array
/// copy/move/set is converted to a sequence of store operations. Its use
/// helps to ensure that such replacements don't generate code that causes an
/// alignment error (trap) on the target machine.
/// a relative speed of the unaligned memory access in the last argument by
/// reference. The higher the speed number the faster the operation comparing
/// to a number returned by another such call. This is used, for example, in
/// situations where an array copy/move/set is converted to a sequence of
/// store operations. Its use helps to ensure that such replacements don't
/// generate code that causes an alignment error (trap) on the target machine.
virtual bool allowsMisalignedMemoryAccesses(
EVT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool * /*Fast*/ = nullptr) const {
unsigned * /*Fast*/ = nullptr) const {
return false;
}
@ -1731,51 +1732,51 @@ public:
virtual bool allowsMisalignedMemoryAccesses(
LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool * /*Fast*/ = nullptr) const {
unsigned * /*Fast*/ = nullptr) const {
return false;
}
/// This function returns true if the memory access is aligned or if the
/// target allows this specific unaligned memory access. If the access is
/// allowed, the optional final parameter returns if the access is also fast
/// (as defined by the target).
/// allowed, the optional final parameter returns a relative speed of the
/// access (as defined by the target).
bool allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const;
unsigned *Fast = nullptr) const;
/// Return true if the memory access of this type is aligned or if the target
/// allows this specific unaligned access for the given MachineMemOperand.
/// If the access is allowed, the optional final parameter returns if the
/// access is also fast (as defined by the target).
/// If the access is allowed, the optional final parameter returns a relative
/// speed of the access (as defined by the target).
bool allowsMemoryAccessForAlignment(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
bool *Fast = nullptr) const;
unsigned *Fast = nullptr) const;
/// Return true if the target supports a memory access of this type for the
/// given address space and alignment. If the access is allowed, the optional
/// final parameter returns if the access is also fast (as defined by the
/// target).
/// final parameter returns the relative speed of the access (as defined by
/// the target).
virtual bool
allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const;
unsigned *Fast = nullptr) const;
/// Return true if the target supports a memory access of this type for the
/// given MachineMemOperand. If the access is allowed, the optional
/// final parameter returns if the access is also fast (as defined by the
/// final parameter returns the relative access speed (as defined by the
/// target).
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
bool *Fast = nullptr) const;
unsigned *Fast = nullptr) const;
/// LLT handling variant.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty,
const MachineMemOperand &MMO,
bool *Fast = nullptr) const;
unsigned *Fast = nullptr) const;
/// Returns the target specific optimal type for load and store operations as
/// a result of memset, memcpy, and memmove lowering.

View File

@ -558,11 +558,12 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
return TTIImpl->isFPVectorizationPotentiallyUnsafe();
}
bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
unsigned AddressSpace,
Align Alignment,
bool *Fast) const {
bool
TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
unsigned AddressSpace,
Align Alignment,
unsigned *Fast) const {
return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth,
AddressSpace, Alignment, Fast);
}

View File

@ -3527,7 +3527,7 @@ bool CombinerHelper::matchLoadOrCombine(
// Load must be allowed and fast on the target.
LLVMContext &C = MF.getFunction().getContext();
auto &DL = MF.getDataLayout();
bool Fast = false;
unsigned Fast = 0;
if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
!Fast)
return false;
@ -3732,7 +3732,7 @@ bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
const auto &DL = LastStore.getMF()->getDataLayout();
auto &C = LastStore.getMF()->getFunction().getContext();
// Check that a store of the wide type is both allowed and fast on the target
bool Fast = false;
unsigned Fast = 0;
bool Allowed = getTargetLowering().allowsMemoryAccess(
C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
if (!Allowed || !Fast)

View File

@ -7394,7 +7394,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
// If the new LLT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
unsigned Fast;
// Need to get a VT equivalent for allowMisalignedMemoryAccesses().
MVT VT = getMVTForLLT(Ty);
if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&

View File

@ -8201,7 +8201,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// Check that a store of the wide type is both allowed and fast on the target
const DataLayout &Layout = DAG.getDataLayout();
bool Fast = false;
unsigned Fast = 0;
bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
*FirstStore->getMemOperand(), &Fast);
if (!Allowed || !Fast)
@ -8447,7 +8447,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
unsigned Fast = 0;
bool Allowed =
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
*FirstLoad->getMemOperand(), &Fast);
@ -9954,7 +9954,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
uint64_t PtrOff =
IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
bool Fast = false;
unsigned Fast = 0;
if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
RHS->getAddressSpace(), NewAlign,
RHS->getMemOperand()->getFlags(), &Fast) &&
@ -13859,7 +13859,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
bool LD1Fast = false;
unsigned LD1Fast = 0;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
@ -17575,7 +17575,7 @@ struct LoadedSlice {
// Check if it will be merged with the load.
// 1. Check the alignment / fast memory access constraint.
bool IsFast = false;
unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
Origin->getAddressSpace(), getAlign(),
Origin->getMemOperand()->getFlags(), &IsFast) ||
@ -18078,7 +18078,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
bool IsFast = false;
unsigned IsFast = 0;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
LD->getAddressSpace(), NewAlign,
@ -18137,7 +18137,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
bool FastLD = false, FastST = false;
unsigned FastLD = 0, FastST = 0;
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
@ -18749,7 +18749,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
bool IsFast = false;
unsigned IsFast = 0;
// Break early when size is too large to be legal.
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
@ -18859,7 +18859,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast = false;
unsigned IsFast = 0;
// Break early when size is too large to be legal.
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
@ -19012,8 +19012,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
break;
bool IsFastSt = false;
bool IsFastLd = false;
unsigned IsFastSt = 0;
unsigned IsFastLd = 0;
// Don't try vector types if we need a rotate. We may still fail the
// legality checks for the integer type, but we can't handle the rotate
// case with vectors.
@ -20098,7 +20098,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
bool IsFast = false;
unsigned IsFast = 0;
if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
OriginalLoad->getAddressSpace(), Alignment,
OriginalLoad->getMemOperand()->getFlags(),

View File

@ -261,7 +261,7 @@ bool TargetLowering::findOptimalMemOpLowering(
// If the new VT cannot cover all of the remaining bits, then consider
// issuing a (or a pair of) unaligned and overlapping load / store.
bool Fast;
unsigned Fast;
if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
allowsMisalignedMemoryAccesses(
VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),

View File

@ -1716,7 +1716,7 @@ uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
// would be to implement this check directly (make this a virtual function).
@ -1726,7 +1726,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) {
// Assume that an access that meets the ABI-specified alignment is fast.
if (Fast != nullptr)
*Fast = true;
*Fast = 1;
return true;
}
@ -1736,7 +1736,7 @@ bool TargetLoweringBase::allowsMemoryAccessForAlignment(
bool TargetLoweringBase::allowsMemoryAccessForAlignment(
LLVMContext &Context, const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO, bool *Fast) const {
const MachineMemOperand &MMO, unsigned *Fast) const {
return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
MMO.getAlign(), MMO.getFlags(), Fast);
}
@ -1745,7 +1745,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const {
unsigned *Fast) const {
return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
Flags, Fast);
}
@ -1753,7 +1753,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
bool *Fast) const {
unsigned *Fast) const {
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
MMO.getFlags(), Fast);
}
@ -1761,7 +1761,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, LLT Ty,
const MachineMemOperand &MMO,
bool *Fast) const {
unsigned *Fast) const {
EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
MMO.getFlags(), Fast);

View File

@ -2057,7 +2057,7 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
unsigned *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
@ -2082,7 +2082,7 @@ bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
unsigned *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
@ -14329,7 +14329,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
unsigned Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;
@ -14359,7 +14359,7 @@ LLT AArch64TargetLowering::getOptimalMemOpLLT(
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
unsigned Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;

View File

@ -537,12 +537,12 @@ public:
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
unsigned *Fast = nullptr) const override;
/// LLT variant.
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast = nullptr) const override;
unsigned *Fast = nullptr) const override;
/// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

View File

@ -683,7 +683,7 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
return false;
bool Fast = false;
unsigned Fast = 0;
return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
CastTy, MMO, &Fast) &&
Fast;
@ -2903,7 +2903,7 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
unsigned Size = VT.getStoreSize();
Align Alignment = LN->getAlign();
if (Alignment < Size && isTypeLegal(VT)) {
bool IsFast;
unsigned IsFast;
unsigned AS = LN->getAddressSpace();
// Expand unaligned loads earlier than legalization. Due to visitation order
@ -2956,7 +2956,7 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
Align Alignment = SN->getAlign();
if (Alignment < Size && isTypeLegal(VT)) {
bool IsFast;
unsigned IsFast;
unsigned AS = SN->getAddressSpace();
// Expand unaligned stores earlier than legalization. Due to visitation

View File

@ -406,7 +406,7 @@ static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
// Do not widen if it would introduce a slow unaligned load.
const SITargetLowering *TLI = ST.getTargetLowering();
bool Fast = false;
unsigned Fast = 0;
return TLI->allowsMisalignedMemoryAccessesImpl(
RoundedSize, AddrSpace, Align(AlignInBits / 8),
MachineMemOperand::MOLoad, &Fast) &&

View File

@ -1521,9 +1521,9 @@ bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
bool R600TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *IsFast) const {
unsigned *IsFast) const {
if (IsFast)
*IsFast = false;
*IsFast = 0;
if (!VT.isSimple() || VT == MVT::Other)
return false;
@ -1533,7 +1533,7 @@ bool R600TargetLowering::allowsMisalignedMemoryAccesses(
// TODO: This is a rough estimate.
if (IsFast)
*IsFast = true;
*IsFast = 1;
return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
}

View File

@ -52,7 +52,7 @@ public:
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override;
unsigned *IsFast = nullptr) const override;
bool canCombineTruncStore(EVT ValVT, EVT MemVT,
bool LegalOperations) const override {

View File

@ -1386,9 +1386,9 @@ bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags, bool *IsFast) const {
MachineMemOperand::Flags Flags, unsigned *IsFast) const {
if (IsFast)
*IsFast = false;
*IsFast = 0;
if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
@ -1427,7 +1427,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// ds_write2_b32 depending on the alignment. In either case with either
// alignment there is no faster way of doing this.
if (IsFast)
*IsFast = true;
*IsFast = 1;
return true;
}
@ -1467,7 +1467,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// will be more of them, so overall we will pay less penalty issuing a
// single instruction.
if (IsFast)
*IsFast = Alignment >= RequiredAlignment || Alignment < Align(4);
*IsFast= Alignment >= RequiredAlignment || Alignment < Align(4);
return true;
}
@ -1530,14 +1530,14 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
// byte-address are ignored, thus forcing Dword alignment.
// This applies to private, global, and constant memory.
if (IsFast)
*IsFast = true;
*IsFast = 1;
return Size >= 32 && Alignment >= Align(4);
}
bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *IsFast) const {
unsigned *IsFast) const {
bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
Alignment, Flags, IsFast);
@ -1550,7 +1550,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(
// which would be equally misaligned.
// This is only used by the common passes, selection always calls the
// allowsMisalignedMemoryAccessesImpl version.
*IsFast = true;
*IsFast= 1;
}
return Allow;
@ -8755,7 +8755,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
bool Fast = false;
unsigned Fast = 0;
auto Flags = Load->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
Load->getAlign(), Flags, &Fast) &&
@ -9254,7 +9254,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("unsupported private_element_size");
}
} else if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
bool Fast = false;
unsigned Fast = 0;
auto Flags = Store->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
Store->getAlign(), Flags, &Fast) &&

View File

@ -291,14 +291,14 @@ public:
bool allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const;
unsigned *IsFast = nullptr) const;
bool allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override {
unsigned *IsFast = nullptr) const override {
if (IsFast)
*IsFast = false;
*IsFast = 0;
return allowsMisalignedMemoryAccessesImpl(Ty.getSizeInBits(), AddrSpace,
Alignment, Flags, IsFast);
}
@ -306,7 +306,7 @@ public:
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *IsFast = nullptr) const override;
unsigned *IsFast = nullptr) const override;
EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;

View File

@ -18817,7 +18817,7 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
Align Alignment,
MachineMemOperand::Flags,
bool *Fast) const {
unsigned *Fast) const {
// Depends what it gets converted into if the type is weird.
if (!VT.isSimple())
return false;
@ -18841,7 +18841,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
// A big-endian target may also explicitly support unaligned accesses
if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}
}
@ -18853,7 +18853,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
Ty == MVT::v2i1)) {
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}
@ -18879,7 +18879,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned,
Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
Ty == MVT::v2f64) {
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}
@ -18892,7 +18892,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
bool Fast;
unsigned Fast;
if (Op.size() >= 16 &&
(Op.isAligned(Align(16)) ||
(allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),

View File

@ -445,7 +445,7 @@ class VectorType;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;

View File

@ -3674,7 +3674,7 @@ EVT HexagonTargetLowering::getOptimalMemOpType(
bool HexagonTargetLowering::allowsMemoryAccess(
LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
MVT SVT = VT.getSimpleVT();
if (Subtarget.isHVXVectorType(SVT, true))
return allowsHvxMemoryAccess(SVT, Flags, Fast);
@ -3684,12 +3684,12 @@ bool HexagonTargetLowering::allowsMemoryAccess(
bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
unsigned *Fast) const {
MVT SVT = VT.getSimpleVT();
if (Subtarget.isHVXVectorType(SVT, true))
return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
if (Fast)
*Fast = false;
*Fast = 0;
return false;
}

View File

@ -326,12 +326,12 @@ public:
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
/// Returns relocation base for the given PIC jumptable.
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
@ -442,10 +442,10 @@ private:
SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
bool allowsHvxMemoryAccess(MVT VecTy, MachineMemOperand::Flags Flags,
bool *Fast) const;
unsigned *Fast) const;
bool allowsHvxMisalignedMemoryAccesses(MVT VecTy,
MachineMemOperand::Flags Flags,
bool *Fast) const;
unsigned *Fast) const;
void AdjustHvxInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const;
bool isHvxSingleTy(MVT Ty) const;

View File

@ -572,7 +572,7 @@ HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
}
bool HexagonTargetLowering::allowsHvxMemoryAccess(
MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
// Bool vectors are excluded by default, but make it explicit to
// emphasize that bool vectors cannot be loaded or stored.
// Also, disallow double vector stores (to prevent unnecessary
@ -582,17 +582,17 @@ bool HexagonTargetLowering::allowsHvxMemoryAccess(
if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
return false;
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}
bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
if (!Subtarget.isHVXVectorType(VecTy))
return false;
// XXX Should this be false? vmemu are a bit slower than vmem.
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}

View File

@ -156,7 +156,7 @@ llvm::createMips16TargetLowering(const MipsTargetMachine &TM,
}
bool Mips16TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
return false;
}

View File

@ -24,7 +24,7 @@ namespace llvm {
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,

View File

@ -415,7 +415,7 @@ SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
if (Subtarget.systemSupportsUnalignedAccess()) {
@ -424,7 +424,7 @@ bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
// a hybrid of the two but it's expected that most implementations will
// handle the majority of cases in hardware.
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}
@ -432,7 +432,7 @@ bool MipsSETargetLowering::allowsMisalignedMemoryAccesses(
case MVT::i64:
case MVT::i32:
if (Fast)
*Fast = true;
*Fast = 1;
return true;
default:
return false;

View File

@ -43,7 +43,7 @@ class TargetRegisterClass;
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
unsigned *Fast = nullptr) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

View File

@ -16711,7 +16711,7 @@ bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
MachineMemOperand::Flags,
bool *Fast) const {
unsigned *Fast) const {
if (DisablePPCUnaligned)
return false;
@ -16742,7 +16742,7 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
return false;
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}

View File

@ -1076,7 +1076,7 @@ namespace llvm {
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
unsigned *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be

View File

@ -13266,10 +13266,10 @@ bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
unsigned *Fast) const {
if (!VT.isVector()) {
if (Fast)
*Fast = false;
*Fast = 0;
return Subtarget.enableUnalignedScalarMem();
}
@ -13277,7 +13277,7 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT ElemVT = VT.getVectorElementType();
if (Alignment >= ElemVT.getStoreSize()) {
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}

View File

@ -543,7 +543,7 @@ public:
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
unsigned *Fast = nullptr) const override;
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts,

View File

@ -861,12 +861,12 @@ bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
}
bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
// Unaligned accesses should never be slower than the expanded version.
// We check specifically for aligned accesses in the few cases where
// they are required.
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}

View File

@ -455,7 +455,7 @@ public:
Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
bool
findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
const MemOp &Op, unsigned DstAS, unsigned SrcAS,

View File

@ -887,10 +887,10 @@ bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,
Align A,
MachineMemOperand::Flags,
bool *Fast) const {
unsigned *Fast) const {
if (Fast) {
// It's fast anytime on VE
*Fast = true;
*Fast = 1;
}
return true;
}

View File

@ -223,7 +223,7 @@ public:
/// specified type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align A,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
/// Inline Assembly {

View File

@ -781,7 +781,7 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
MachineMemOperand::Flags /*Flags*/, bool *Fast) const {
MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
// WebAssembly supports unaligned accesses, though it should be declared
// with the p2align attribute on loads and stores which do so, and there
// may be a performance impact. We tell LLVM they're "fast" because
@ -789,7 +789,7 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
// of constants, etc.), WebAssembly implementations will either want the
// unaligned access or they'll split anyway.
if (Fast)
*Fast = true;
*Fast = 1;
return true;
}

View File

@ -72,7 +72,7 @@ private:
Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;

View File

@ -2730,12 +2730,12 @@ bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
bool X86TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
unsigned *Fast) const {
if (Fast) {
switch (VT.getSizeInBits()) {
default:
// 8-byte and under are always assumed to be fast.
*Fast = true;
*Fast = 1;
break;
case 128:
*Fast = !Subtarget.isUnalignedMem16Slow();
@ -49628,7 +49628,7 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
// into two 16-byte operations. Also split non-temporal aligned loads on
// pre-AVX2 targets as 32-byte loads will lower to regular temporal loads.
ISD::LoadExtType Ext = Ld->getExtensionType();
bool Fast;
unsigned Fast;
if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
Ext == ISD::NON_EXTLOAD &&
((Ld->isNonTemporal() && !Subtarget.hasInt256() &&
@ -50086,7 +50086,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// If we are saving a 32-byte vector and 32-byte stores are slow, such as on
// Sandy Bridge, perform two 16-byte stores.
bool Fast;
unsigned Fast;
if (VT.is256BitVector() && StVT == VT &&
TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*St->getMemOperand(), &Fast) &&
@ -54822,7 +54822,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// Fold subvector loads into one.
// If needed, look through bitcasts to get to the load.
if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) {
bool Fast;
unsigned Fast;
const X86TargetLowering *TLI = Subtarget.getTargetLowering();
if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
*FirstLd->getMemOperand(), &Fast) &&

View File

@ -1005,7 +1005,7 @@ namespace llvm {
/// specified type. Returns whether it is "fast" in the last argument.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const override;
unsigned *Fast) const override;
/// Provide custom lowering hooks for some operations.
///

View File

@ -775,7 +775,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
return false;
unsigned AS = LI1->getPointerAddressSpace();
bool Fast = false;
unsigned Fast = 0;
Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
AS, LI1->getAlign(), &Fast);
if (!Allowed || !Fast)

View File

@ -1320,7 +1320,7 @@ bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
if (Alignment.value() % SzInBytes == 0)
return false;
bool Fast = false;
unsigned Fast = 0;
bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
SzInBytes * 8, AddressSpace,
Alignment, &Fast);