llvm-project/llvm/lib/Support/Unix/Threading.inc

444 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides the Unix specific implementation of Threading functions.
//
//===----------------------------------------------------------------------===//
#include "Unix.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#if defined(__APPLE__)
#include <mach/mach_init.h>
#include <mach/mach_port.h>
#include <pthread/qos.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#endif
#include <pthread.h>
#if defined(__FreeBSD__) || defined(__OpenBSD__)
#include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
#endif
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <errno.h>
#include <sys/cpuset.h>
#include <sys/sysctl.h>
#include <sys/user.h>
#include <unistd.h>
#endif
#if defined(__NetBSD__)
#include <lwp.h> // For _lwp_self()
#endif
#if defined(__OpenBSD__)
#include <unistd.h> // For getthrid()
#endif
#if defined(__linux__)
#include <sched.h> // For sched_getaffinity
#include <sys/syscall.h> // For syscall codes
#include <unistd.h> // For syscall()
#endif
namespace llvm {
pthread_t
llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
llvm::Optional<unsigned> StackSizeInBytes) {
int errnum;
// Construct the attributes object.
pthread_attr_t Attr;
if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
ReportErrnumFatal("pthread_attr_init failed", errnum);
}
auto AttrGuard = llvm::make_scope_exit([&] {
if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
ReportErrnumFatal("pthread_attr_destroy failed", errnum);
}
});
// Set the requested stack size, if given.
if (StackSizeInBytes) {
if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
}
}
// Construct and execute the thread.
pthread_t Thread;
if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
ReportErrnumFatal("pthread_create failed", errnum);
return Thread;
}
void llvm_thread_detach_impl(pthread_t Thread) {
int errnum;
if ((errnum = ::pthread_detach(Thread)) != 0) {
ReportErrnumFatal("pthread_detach failed", errnum);
}
}
void llvm_thread_join_impl(pthread_t Thread) {
int errnum;
if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
ReportErrnumFatal("pthread_join failed", errnum);
}
}
pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
} // namespace llvm
uint64_t llvm::get_threadid() {
#if defined(__APPLE__)
// Calling "mach_thread_self()" bumps the reference count on the thread
// port, so we need to deallocate it. mach_task_self() doesn't bump the ref
// count.
thread_port_t Self = mach_thread_self();
mach_port_deallocate(mach_task_self(), Self);
return Self;
#elif defined(__FreeBSD__)
return uint64_t(pthread_getthreadid_np());
#elif defined(__NetBSD__)
return uint64_t(_lwp_self());
#elif defined(__OpenBSD__)
return uint64_t(getthrid());
#elif defined(__ANDROID__)
return uint64_t(gettid());
#elif defined(__linux__)
return uint64_t(syscall(SYS_gettid));
#else
return uint64_t(pthread_self());
#endif
}
static constexpr uint32_t get_max_thread_name_length_impl() {
#if defined(__NetBSD__)
return PTHREAD_MAX_NAMELEN_NP;
#elif defined(__APPLE__)
return 64;
#elif defined(__linux__)
#if HAVE_PTHREAD_SETNAME_NP
return 16;
#else
return 0;
#endif
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
return 16;
#elif defined(__OpenBSD__)
return 32;
#else
return 0;
#endif
}
uint32_t llvm::get_max_thread_name_length() {
return get_max_thread_name_length_impl();
}
void llvm::set_thread_name(const Twine &Name) {
// Make sure the input is null terminated.
SmallString<64> Storage;
StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
// Truncate from the beginning, not the end, if the specified name is too
// long. For one, this ensures that the resulting string is still null
// terminated, but additionally the end of a long thread name will usually
// be more unique than the beginning, since a common pattern is for similar
// threads to share a common prefix.
// Note that the name length includes the null terminator.
if (get_max_thread_name_length() > 0)
NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
(void)NameStr;
#if defined(__linux__)
#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
#if HAVE_PTHREAD_SETNAME_NP
::pthread_setname_np(::pthread_self(), NameStr.data());
#endif
#endif
#elif defined(__FreeBSD__) || defined(__OpenBSD__)
::pthread_set_name_np(::pthread_self(), NameStr.data());
#elif defined(__NetBSD__)
::pthread_setname_np(::pthread_self(), "%s",
const_cast<char *>(NameStr.data()));
#elif defined(__APPLE__)
::pthread_setname_np(NameStr.data());
#endif
}
void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
Name.clear();
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
int pid = ::getpid();
uint64_t tid = get_threadid();
struct kinfo_proc *kp = nullptr, *nkp;
size_t len = 0;
int error;
int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
(int)pid};
while (1) {
error = sysctl(ctl, 4, kp, &len, nullptr, 0);
if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
// Add extra space in case threads are added before next call.
len += sizeof(*kp) + len / 10;
nkp = (struct kinfo_proc *)::realloc(kp, len);
if (nkp == nullptr) {
free(kp);
return;
}
kp = nkp;
continue;
}
if (error != 0)
len = 0;
break;
}
for (size_t i = 0; i < len / sizeof(*kp); i++) {
if (kp[i].ki_tid == (lwpid_t)tid) {
Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
break;
}
}
free(kp);
return;
#elif defined(__NetBSD__)
constexpr uint32_t len = get_max_thread_name_length_impl();
char buf[len];
::pthread_getname_np(::pthread_self(), buf, len);
Name.append(buf, buf + strlen(buf));
#elif defined(__OpenBSD__)
constexpr uint32_t len = get_max_thread_name_length_impl();
char buf[len];
::pthread_get_name_np(::pthread_self(), buf, len);
Name.append(buf, buf + strlen(buf));
#elif defined(__linux__)
#if HAVE_PTHREAD_GETNAME_NP
constexpr uint32_t len = get_max_thread_name_length_impl();
char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
Name.append(Buffer, Buffer + strlen(Buffer));
#endif
#endif
}
SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
#if defined(__linux__) && defined(SCHED_IDLE)
// Some *really* old glibcs are missing SCHED_IDLE.
// http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
// http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
sched_param priority;
// For each of the above policies, param->sched_priority must be 0.
priority.sched_priority = 0;
// SCHED_IDLE for running very low priority background jobs.
// SCHED_OTHER the standard round-robin time-sharing policy;
return !pthread_setschedparam(
pthread_self(),
// FIXME: consider SCHED_BATCH for Low
Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
&priority)
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
#elif defined(__APPLE__)
// https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
//
// Background - Applies to work that isnt visible to the user and may take
// significant time to complete. Examples include indexing, backing up, or
// synchronizing data. This class emphasizes energy efficiency.
//
// Utility - Applies to work that takes anywhere from a few seconds to a few
// minutes to complete. Examples include downloading a document or importing
// data. This class offers a balance between responsiveness, performance, and
// energy efficiency.
const auto qosClass = [&]() {
switch (Priority) {
case ThreadPriority::Background:
return QOS_CLASS_BACKGROUND;
case ThreadPriority::Low:
return QOS_CLASS_UTILITY;
case ThreadPriority::Default:
return QOS_CLASS_DEFAULT;
}
}();
return !pthread_set_qos_class_self_np(qosClass, 0)
? SetThreadPriorityResult::SUCCESS
: SetThreadPriorityResult::FAILURE;
#endif
return SetThreadPriorityResult::FAILURE;
}
#include <thread>
static int computeHostNumHardwareThreads() {
#if defined(__FreeBSD__)
cpuset_t mask;
CPU_ZERO(&mask);
if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
&mask) == 0)
return CPU_COUNT(&mask);
#elif defined(__linux__)
cpu_set_t Set;
if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
return CPU_COUNT(&Set);
#endif
// Guard against std::thread::hardware_concurrency() returning 0.
if (unsigned Val = std::thread::hardware_concurrency())
return Val;
return 1;
}
void llvm::ThreadPoolStrategy::apply_thread_strategy(
unsigned ThreadPoolNum) const {}
llvm::BitVector llvm::get_thread_affinity_mask() {
// FIXME: Implement
llvm_unreachable("Not implemented!");
}
unsigned llvm::get_cpus() { return 1; }
#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
// using the number of unique physical/core id pairs. The following
// implementation reads the /proc/cpuinfo format on an x86_64 system.
static int computeHostNumPhysicalCores() {
// Enabled represents the number of physical id/core id pairs with at least
// one processor id enabled by the CPU affinity mask.
cpu_set_t Affinity, Enabled;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
return -1;
CPU_ZERO(&Enabled);
// Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
// mmapped because it appears to have 0 size.
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Can't read "
<< "/proc/cpuinfo: " << EC.message() << "\n";
return -1;
}
SmallVector<StringRef, 8> strs;
(*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
int CurProcessor = -1;
int CurPhysicalId = -1;
int CurSiblings = -1;
int CurCoreId = -1;
for (StringRef Line : strs) {
std::pair<StringRef, StringRef> Data = Line.split(':');
auto Name = Data.first.trim();
auto Val = Data.second.trim();
// These fields are available if the kernel is configured with CONFIG_SMP.
if (Name == "processor")
Val.getAsInteger(10, CurProcessor);
else if (Name == "physical id")
Val.getAsInteger(10, CurPhysicalId);
else if (Name == "siblings")
Val.getAsInteger(10, CurSiblings);
else if (Name == "core id") {
Val.getAsInteger(10, CurCoreId);
// The processor id corresponds to an index into cpu_set_t.
if (CPU_ISSET(CurProcessor, &Affinity))
CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
}
}
return CPU_COUNT(&Enabled);
}
#elif defined(__linux__) && defined(__s390x__)
static int computeHostNumPhysicalCores() {
return sysconf(_SC_NPROCESSORS_ONLN);
}
#elif defined(__linux__) && !defined(__ANDROID__)
static int computeHostNumPhysicalCores() {
cpu_set_t Affinity;
if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
return CPU_COUNT(&Affinity);
// The call to sched_getaffinity() may have failed because the Affinity
// mask is too small for the number of CPU's on the system (i.e. the
// system has more than 1024 CPUs). Allocate a mask large enough for
// twice as many CPUs.
cpu_set_t *DynAffinity;
DynAffinity = CPU_ALLOC(2048);
if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
int NumCPUs = CPU_COUNT(DynAffinity);
CPU_FREE(DynAffinity);
return NumCPUs;
}
return -1;
}
#elif defined(__APPLE__)
// Gets the number of *physical cores* on the machine.
static int computeHostNumPhysicalCores() {
uint32_t count;
size_t len = sizeof(count);
sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
if (count < 1) {
int nm[2];
nm[0] = CTL_HW;
nm[1] = HW_AVAILCPU;
sysctl(nm, 2, &count, &len, NULL, 0);
if (count < 1)
return -1;
}
return count;
}
#elif defined(__MVS__)
static int computeHostNumPhysicalCores() {
enum {
// Byte offset of the pointer to the Communications Vector Table (CVT) in
// the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
// will be zero-extended to uintptr_t.
FLCCVT = 16,
// Byte offset of the pointer to the Common System Data Area (CSD) in the
// CVT. The table entry is a 31-bit pointer and will be zero-extended to
// uintptr_t.
CVTCSD = 660,
// Byte offset to the number of live CPs in the LPAR, stored as a signed
// 32-bit value in the table.
CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
};
char *PSA = 0;
char *CVT = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
char *CSD = reinterpret_cast<char *>(
static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
}
#else
// On other systems, return -1 to indicate unknown.
static int computeHostNumPhysicalCores() { return -1; }
#endif
int llvm::get_physical_cores() {
static int NumCores = computeHostNumPhysicalCores();
return NumCores;
}