forked from OSchip/llvm-project
[RISCV] Add target feature to force-enable atomics
This adds a +forced-atomics target feature with the same semantics as +atomics-32 on ARM (D130480). For RISCV targets without the +a extension, this forces LLVM to assume that lock-free atomics (up to 32/64 bits for riscv32/64 respectively) are available. This means that atomic load/store are lowered to a simple load/store (and fence as necessary), as these are guaranteed to be atomic (as long as they're aligned). Atomic RMW/CAS are lowered to __sync (rather than __atomic) libcalls. Responsibility for providing the __sync libcalls lies with the user (for privileged single-core code they can be implemented by disabling interrupts). Code using +forced-atomics and -forced-atomics are not ABI compatible if atomic variables cross the ABI boundary. For context, the difference between __sync and __atomic is that the former are required to be lock-free, while the latter requires a shared global lock provided by a shared object library. See https://llvm.org/docs/Atomics.html#libcalls-atomic for a detailed discussion on the topic. This target feature will be used by Rust's riscv32i target family to support the use of atomic load/store without atomic RMW/CAS. Differential Revision: https://reviews.llvm.org/D130621
This commit is contained in:
parent
6da3f90195
commit
f5ed0cb217
|
@ -581,6 +581,13 @@ case. The only common architecture without that property is SPARC -- SPARCV8 SMP
|
||||||
systems were common, yet it doesn't support any sort of compare-and-swap
|
systems were common, yet it doesn't support any sort of compare-and-swap
|
||||||
operation.
|
operation.
|
||||||
|
|
||||||
|
Some targets (like RISCV) support a ``+forced-atomics`` target feature, which
|
||||||
|
enables the use of lock-free atomics even if LLVM is not aware of any specific
|
||||||
|
OS support for them. In this case, the user is responsible for ensuring that
|
||||||
|
necessary ``__sync_*`` implementations are available. Code using
|
||||||
|
``+forced-atomics`` is ABI-incompatible with code not using the feature, if
|
||||||
|
atomic variables cross the ABI boundary.
|
||||||
|
|
||||||
In either of these cases, the Target in LLVM can claim support for atomics of an
|
In either of these cases, the Target in LLVM can claim support for atomics of an
|
||||||
appropriate size, and then implement some subset of the operations via libcalls
|
appropriate size, and then implement some subset of the operations via libcalls
|
||||||
to a ``__sync_*`` function. Such functions *must* not use locks in their
|
to a ``__sync_*`` function. Such functions *must* not use locks in their
|
||||||
|
|
|
@ -481,6 +481,17 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
|
||||||
"SiFive 7-Series processors",
|
"SiFive 7-Series processors",
|
||||||
[TuneNoDefaultUnroll]>;
|
[TuneNoDefaultUnroll]>;
|
||||||
|
|
||||||
|
// Assume that lock-free native-width atomics are available, even if the target
|
||||||
|
// and operating system combination would not usually provide them. The user
|
||||||
|
// is responsible for providing any necessary __sync implementations. Code
|
||||||
|
// built with this feature is not ABI-compatible with code built without this
|
||||||
|
// feature, if atomic variables are exposed across the ABI boundary.
|
||||||
|
def FeatureForcedAtomics : SubtargetFeature<
|
||||||
|
"forced-atomics", "HasForcedAtomics", "true",
|
||||||
|
"Assume that lock-free native-width atomics are available">;
|
||||||
|
def HasAtomicLdSt
|
||||||
|
: Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Named operands for CSR instructions.
|
// Named operands for CSR instructions.
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
@ -411,6 +411,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||||
if (Subtarget.hasStdExtA()) {
|
if (Subtarget.hasStdExtA()) {
|
||||||
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
|
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
|
||||||
setMinCmpXchgSizeInBits(32);
|
setMinCmpXchgSizeInBits(32);
|
||||||
|
} else if (Subtarget.hasForcedAtomics()) {
|
||||||
|
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
|
||||||
} else {
|
} else {
|
||||||
setMaxAtomicSizeInBitsSupported(0);
|
setMaxAtomicSizeInBitsSupported(0);
|
||||||
}
|
}
|
||||||
|
@ -929,6 +931,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (Subtarget.hasForcedAtomics()) {
|
||||||
|
// Set atomic rmw/cas operations to expand to force __sync libcalls.
|
||||||
|
setOperationAction(
|
||||||
|
{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
|
||||||
|
ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
|
||||||
|
ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
|
||||||
|
ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
|
||||||
|
XLenVT, Expand);
|
||||||
|
}
|
||||||
|
|
||||||
// Function alignments.
|
// Function alignments.
|
||||||
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
|
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
|
||||||
setMinFunctionAlignment(FunctionAlignment);
|
setMinFunctionAlignment(FunctionAlignment);
|
||||||
|
@ -12286,6 +12298,10 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||||
if (AI->isFloatingPointOperation())
|
if (AI->isFloatingPointOperation())
|
||||||
return AtomicExpansionKind::CmpXChg;
|
return AtomicExpansionKind::CmpXChg;
|
||||||
|
|
||||||
|
// Don't expand forced atomics, we want to have __sync libcalls instead.
|
||||||
|
if (Subtarget.hasForcedAtomics())
|
||||||
|
return AtomicExpansionKind::None;
|
||||||
|
|
||||||
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
|
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
|
||||||
if (Size == 8 || Size == 16)
|
if (Size == 8 || Size == 16)
|
||||||
return AtomicExpansionKind::MaskedIntrinsic;
|
return AtomicExpansionKind::MaskedIntrinsic;
|
||||||
|
@ -12389,6 +12405,10 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
|
||||||
TargetLowering::AtomicExpansionKind
|
TargetLowering::AtomicExpansionKind
|
||||||
RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
|
RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
|
||||||
AtomicCmpXchgInst *CI) const {
|
AtomicCmpXchgInst *CI) const {
|
||||||
|
// Don't expand forced atomics, we want to have __sync libcalls instead.
|
||||||
|
if (Subtarget.hasForcedAtomics())
|
||||||
|
return AtomicExpansionKind::None;
|
||||||
|
|
||||||
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
|
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
|
||||||
if (Size == 8 || Size == 16)
|
if (Size == 8 || Size == 16)
|
||||||
return AtomicExpansionKind::MaskedIntrinsic;
|
return AtomicExpansionKind::MaskedIntrinsic;
|
||||||
|
|
|
@ -105,20 +105,25 @@ defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
|
||||||
// Pseudo-instructions and codegen patterns
|
// Pseudo-instructions and codegen patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
let Predicates = [HasStdExtA] in {
|
// Atomic load/store are available under both +a and +force-atomics.
|
||||||
|
|
||||||
/// Atomic loads and stores
|
|
||||||
|
|
||||||
// Fences will be inserted for atomic load/stores according to the logic in
|
// Fences will be inserted for atomic load/stores according to the logic in
|
||||||
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
|
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
|
||||||
|
let Predicates = [HasAtomicLdSt] in {
|
||||||
|
defm : LdPat<atomic_load_8, LB>;
|
||||||
|
defm : LdPat<atomic_load_16, LH>;
|
||||||
|
defm : LdPat<atomic_load_32, LW>;
|
||||||
|
|
||||||
defm : LdPat<atomic_load_8, LB>;
|
defm : AtomicStPat<atomic_store_8, SB, GPR>;
|
||||||
defm : LdPat<atomic_load_16, LH>;
|
defm : AtomicStPat<atomic_store_16, SH, GPR>;
|
||||||
defm : LdPat<atomic_load_32, LW>;
|
defm : AtomicStPat<atomic_store_32, SW, GPR>;
|
||||||
|
}
|
||||||
|
|
||||||
defm : AtomicStPat<atomic_store_8, SB, GPR>;
|
let Predicates = [HasAtomicLdSt, IsRV64] in {
|
||||||
defm : AtomicStPat<atomic_store_16, SH, GPR>;
|
defm : LdPat<atomic_load_64, LD, i64>;
|
||||||
defm : AtomicStPat<atomic_store_32, SW, GPR>;
|
defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasStdExtA] in {
|
||||||
|
|
||||||
/// AMOs
|
/// AMOs
|
||||||
|
|
||||||
|
@ -304,13 +309,6 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
|
||||||
|
|
||||||
let Predicates = [HasStdExtA, IsRV64] in {
|
let Predicates = [HasStdExtA, IsRV64] in {
|
||||||
|
|
||||||
/// 64-bit atomic loads and stores
|
|
||||||
|
|
||||||
// Fences will be inserted for atomic load/stores according to the logic in
|
|
||||||
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
|
|
||||||
defm : LdPat<atomic_load_64, LD, i64>;
|
|
||||||
defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
|
|
||||||
|
|
||||||
defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
|
defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
|
||||||
defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
|
defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
|
||||||
defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
|
defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
|
||||||
|
|
|
@ -98,6 +98,7 @@ private:
|
||||||
bool EnableSaveRestore = false;
|
bool EnableSaveRestore = false;
|
||||||
bool EnableUnalignedScalarMem = false;
|
bool EnableUnalignedScalarMem = false;
|
||||||
bool HasLUIADDIFusion = false;
|
bool HasLUIADDIFusion = false;
|
||||||
|
bool HasForcedAtomics = false;
|
||||||
unsigned XLen = 32;
|
unsigned XLen = 32;
|
||||||
unsigned ZvlLen = 0;
|
unsigned ZvlLen = 0;
|
||||||
MVT XLenVT = MVT::i32;
|
MVT XLenVT = MVT::i32;
|
||||||
|
@ -194,6 +195,7 @@ public:
|
||||||
bool enableSaveRestore() const { return EnableSaveRestore; }
|
bool enableSaveRestore() const { return EnableSaveRestore; }
|
||||||
bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
|
bool enableUnalignedScalarMem() const { return EnableUnalignedScalarMem; }
|
||||||
bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
|
bool hasLUIADDIFusion() const { return HasLUIADDIFusion; }
|
||||||
|
bool hasForcedAtomics() const { return HasForcedAtomics; }
|
||||||
MVT getXLenVT() const { return XLenVT; }
|
MVT getXLenVT() const { return XLenVT; }
|
||||||
unsigned getXLen() const { return XLen; }
|
unsigned getXLen() const { return XLen; }
|
||||||
unsigned getFLen() const {
|
unsigned getFLen() const {
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue