[OpenMP][libomp] Add hidden helper affinity

Add new hidden helper affinity via the environment variable,
KMP_HIDDEN_HELPER_AFFINITY, which allows users to assign thread
affinity to hidden helper threads using the same syntax as
KMP_AFFINITY. OMP_PLACES/OMP_PROC_BIND have no interaction with
KMP_HIDDEN_HELPER_AFFINITY.

Differential Revision: https://reviews.llvm.org/D135113
This commit is contained in:
Jonathan Peyton 2022-10-03 15:14:40 -05:00
parent b03d67f7f5
commit 7a9643fd2a
9 changed files with 265 additions and 56 deletions

View File

@ -374,6 +374,24 @@ The ``offset`` specifier indicates the starting position for thread assignment.
across one socket, and ``granularity=socket`` the runtime will shift the
granularity down to group since that is the largest granularity allowed by the OS.
KMP_HIDDEN_HELPER_AFFINITY (Windows, Linux)
"""""""""""""""""""""""""""""
Enables run-time library to bind hidden helper threads to physical processing units.
This environment variable has the same syntax and semantics as ``KMP_AFFINIY`` but only
applies to the hidden helper team.
You must set this environment variable before the first parallel region, or
certain API calls including ``omp_get_max_threads()``, ``omp_get_num_procs()``
and any affinity API calls.
**Syntax:** Same as ``KMP_AFFINITY``
The following ``modifiers`` are ignored in ``KMP_HIDDEN_HELPER_AFFINITY`` and are only valid
for ``KMP_AFFINITY``:
* ``respect`` and ``norespect``
* ``reset`` and ``noreset``
KMP_ALL_THREADS
"""""""""""""""

View File

@ -860,6 +860,8 @@ typedef struct kmp_affinity_t {
extern enum affinity_top_method __kmp_affinity_top_method;
extern kmp_affinity_t __kmp_affinity;
extern kmp_affinity_t __kmp_hh_affinity;
extern kmp_affinity_t *__kmp_affinities[2];
extern void __kmp_affinity_bind_thread(int which);
@ -4257,6 +4259,9 @@ extern void __kmp_hidden_helper_main_thread_release();
#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \
((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid) \
((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
#define KMP_HIDDEN_HELPER_TEAM(team) \
(team->t.t_threads[0] == __kmp_hidden_helper_main_thread)

View File

@ -174,9 +174,10 @@ int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
const kmp_hw_thread_t *aa = (const kmp_hw_thread_t *)a;
const kmp_hw_thread_t *bb = (const kmp_hw_thread_t *)b;
int depth = __kmp_topology->get_depth();
KMP_DEBUG_ASSERT(__kmp_affinity.compact >= 0);
KMP_DEBUG_ASSERT(__kmp_affinity.compact <= depth);
for (i = 0; i < __kmp_affinity.compact; i++) {
int compact = __kmp_topology->compact;
KMP_DEBUG_ASSERT(compact >= 0);
KMP_DEBUG_ASSERT(compact <= depth);
for (i = 0; i < compact; i++) {
int j = depth - i - 1;
if (aa->sub_ids[j] < bb->sub_ids[j])
return -1;
@ -184,7 +185,7 @@ int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
return 1;
}
for (; i < depth; i++) {
int j = i - __kmp_affinity.compact;
int j = i - compact;
if (aa->sub_ids[j] < bb->sub_ids[j])
return -1;
if (aa->sub_ids[j] > bb->sub_ids[j])
@ -583,6 +584,7 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
retval->count = arr + 2 * (size_t)KMP_HW_LAST;
retval->num_core_efficiencies = 0;
retval->num_core_types = 0;
retval->compact = 0;
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
@ -4287,6 +4289,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
bool is_regular_affinity = (&affinity == &__kmp_affinity);
bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
const char *env_var = affinity.env_var;
if (affinity.flags.initialized) {
@ -4335,7 +4338,8 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
case affinity_explicit:
KMP_DEBUG_ASSERT(affinity.proclist != NULL);
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
if (is_hidden_helper_affinity ||
__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
__kmp_affinity_process_proclist(affinity);
} else {
__kmp_affinity_process_placelist(affinity);
@ -4391,7 +4395,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
goto sortTopology;
case affinity_balanced:
if (depth <= 1) {
if (depth <= 1 || is_hidden_helper_affinity) {
KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
affinity.type = affinity_none;
__kmp_create_affinity_none_places(affinity);
@ -4451,7 +4455,8 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
(__kmp_affinity_num_places > 0) &&
((unsigned)__kmp_affinity_num_places < affinity.num_masks)) {
((unsigned)__kmp_affinity_num_places < affinity.num_masks) &&
!is_hidden_helper_affinity) {
affinity.num_masks = __kmp_affinity_num_places;
}
@ -4459,7 +4464,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
// Sort the topology table according to the current setting of
// affinity.compact, then fill out affinity.masks.
__kmp_topology->sort_compact();
__kmp_topology->sort_compact(affinity);
{
int i;
unsigned j;
@ -4510,8 +4515,7 @@ void __kmp_affinity_initialize(kmp_affinity_t &affinity) {
}
void __kmp_affinity_uninitialize(void) {
{
kmp_affinity_t *affinity = &__kmp_affinity;
for (kmp_affinity_t *affinity : __kmp_affinities) {
if (affinity->masks != NULL)
KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks);
if (affinity->os_id_masks != NULL)
@ -4546,6 +4550,21 @@ void __kmp_affinity_uninitialize(void) {
KMPAffinity::destroy_api();
}
static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
int *place, kmp_affin_mask_t **mask) {
int mask_idx;
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
if (is_hidden_helper)
// The first gtid is the regular primary thread, the second gtid is the main
// thread of hidden team which does not participate in task execution.
mask_idx = gtid - 2;
else
mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
KMP_DEBUG_ASSERT(affinity->num_masks > 0);
*place = (mask_idx + affinity->offset) % affinity->num_masks;
*mask = KMP_CPU_INDEX(affinity->masks, *place);
}
void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
if (!KMP_AFFINITY_CAPABLE()) {
return;
@ -4565,13 +4584,20 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
// same as the mask of the initialization thread.
kmp_affin_mask_t *mask;
int i;
const kmp_affinity_t *affinity = &__kmp_affinity;
const char *env_var = affinity->env_var;
const kmp_affinity_t *affinity;
const char *env_var;
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
if (KMP_AFFINITY_NON_PROC_BIND) {
if (is_hidden_helper)
affinity = &__kmp_hh_affinity;
else
affinity = &__kmp_affinity;
env_var = affinity->env_var;
if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
if ((affinity->type == affinity_none) ||
(affinity->type == affinity_balanced) ||
KMP_HIDDEN_HELPER_THREAD(gtid)) {
KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
#if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
return;
@ -4581,14 +4607,10 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
i = 0;
mask = __kmp_affin_fullMask;
} else {
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
KMP_DEBUG_ASSERT(affinity->num_masks > 0);
i = (mask_idx + affinity->offset) % affinity->num_masks;
mask = KMP_CPU_INDEX(affinity->masks, i);
__kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
}
} else {
if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) ||
(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
if (!isa_root || __kmp_nested_proc_bind.bind_types[0] == proc_bind_false) {
#if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
return;
@ -4598,17 +4620,12 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
i = KMP_PLACE_ALL;
mask = __kmp_affin_fullMask;
} else {
// int i = some hash function or just a counter that doesn't
// always start at 0. Use adjusted gtid for now.
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
KMP_DEBUG_ASSERT(affinity->num_masks > 0);
i = (mask_idx + affinity->offset) % affinity->num_masks;
mask = KMP_CPU_INDEX(affinity->masks, i);
__kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
}
}
th->th.th_current_place = i;
if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) {
if (isa_root && !is_hidden_helper) {
th->th.th_new_place = i;
th->th.th_first_place = 0;
th->th.th_last_place = affinity->num_masks - 1;
@ -4629,10 +4646,11 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
KMP_CPU_COPY(th->th.th_affin_mask, mask);
if (affinity->flags.verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
/* to avoid duplicate printing (will be correctly printed on barrier) */
&& (affinity->type == affinity_none ||
(i != KMP_PLACE_ALL && affinity->type != affinity_balanced))) {
if (affinity->flags.verbose &&
(affinity->type == affinity_none ||
(i != KMP_PLACE_ALL && affinity->type != affinity_balanced)) &&
!KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
@ -4640,17 +4658,6 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
gtid, buf);
}
#if KMP_DEBUG
// Hidden helper thread affinity only printed for debug builds
if (affinity->flags.verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY (hidden helper thread)",
(kmp_int32)getpid(), __kmp_gettid(), gtid, buf);
}
#endif
#if KMP_OS_WINDOWS
// On Windows* OS, the process affinity mask might have changed. If the user
// didn't request affinity and this call fails, just continue silently.
@ -4663,7 +4670,8 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
}
void __kmp_affinity_set_place(int gtid) {
if (!KMP_AFFINITY_CAPABLE()) {
// Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
return;
}

View File

@ -724,6 +724,9 @@ class kmp_topology_t {
// Flags describing the topology
flags_t flags;
// Compact value used during sort_compact()
int compact;
// Insert a new topology layer after allocation
void _insert_layer(kmp_hw_t type, const int *ids);
@ -866,7 +869,9 @@ public:
}
#if KMP_AFFINITY_SUPPORTED
void sort_compact() {
friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
void sort_compact(kmp_affinity_t &affinity) {
compact = affinity.compact;
qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
kmp_hw_thread_t::compare_compact);
}

View File

@ -274,6 +274,10 @@ enum affinity_top_method __kmp_affinity_top_method =
// Regular thread affinity settings from KMP_AFFINITY
kmp_affinity_t __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY");
// Hidden helper thread affinity settings from KMP_HIDDEN_HELPER_AFFINITY
kmp_affinity_t __kmp_hh_affinity =
KMP_AFFINITY_INIT("KMP_HIDDEN_HELPER_AFFINITY");
kmp_affinity_t *__kmp_affinities[] = {&__kmp_affinity, &__kmp_hh_affinity};
char *__kmp_cpuinfo_file = NULL;

View File

@ -7467,6 +7467,14 @@ void __kmp_hidden_helper_initialize() {
return;
}
#if KMP_AFFINITY_SUPPORTED
// Initialize hidden helper affinity settings.
// The above __kmp_parallel_initialize() will initialize
// regular affinity (and topology) if not already done.
if (!__kmp_hh_affinity.flags.initialized)
__kmp_affinity_initialize(__kmp_hh_affinity);
#endif
// Set the count of hidden helper tasks to be executed to zero
KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);

View File

@ -1247,7 +1247,7 @@ static void __kmp_stg_parse_num_hidden_helper_threads(char const *name,
if (__kmp_hidden_helper_threads_num == 0) {
__kmp_enable_hidden_helper = FALSE;
} else {
// Since the main thread of hidden helper team dooes not participate
// Since the main thread of hidden helper team does not participate
// in tasks execution let's increment the number of threads by one
// so that requested number of threads do actual job.
__kmp_hidden_helper_threads_num++;
@ -2542,9 +2542,21 @@ static void __kmp_stg_parse_affinity(char const *name, char const *value,
__kmp_parse_affinity_env(name, value, &__kmp_affinity);
} // __kmp_stg_parse_affinity
static void __kmp_stg_parse_hh_affinity(char const *name, char const *value,
void *data) {
__kmp_parse_affinity_env(name, value, &__kmp_hh_affinity);
// Warn about unused parts of hidden helper affinity settings if specified.
if (__kmp_hh_affinity.flags.reset) {
KMP_WARNING(AffInvalidParam, name, "reset");
}
if (__kmp_hh_affinity.flags.respect != affinity_respect_mask_default) {
KMP_WARNING(AffInvalidParam, name, "respect");
}
}
static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
const kmp_affinity_t &affinity) {
bool is_hh_affinity = (&affinity == &__kmp_hh_affinity);
if (__kmp_env_format) {
KMP_STR_BUF_PRINT_NAME_EX(name);
} else {
@ -2561,6 +2573,9 @@ static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
__kmp_str_buf_print(buffer, "%s,", "nowarnings");
}
if (KMP_AFFINITY_CAPABLE()) {
// Hidden helper affinity does not affect global reset
// or respect flags. That is still solely controlled by KMP_AFFINITY.
if (!is_hh_affinity) {
if (affinity.flags.respect) {
__kmp_str_buf_print(buffer, "%s,", "respect");
} else {
@ -2571,6 +2586,7 @@ static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
} else {
__kmp_str_buf_print(buffer, "%s,", "noreset");
}
}
__kmp_str_buf_print(buffer, "granularity=%s,",
__kmp_hw_get_keyword(affinity.gran, false));
}
@ -2620,6 +2636,10 @@ static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_print_affinity_env(buffer, name, __kmp_affinity);
}
static void __kmp_stg_print_hh_affinity(kmp_str_buf_t *buffer, char const *name,
void *data) {
__kmp_print_affinity_env(buffer, name, __kmp_hh_affinity);
}
#ifdef KMP_GOMP_COMPAT
@ -5472,6 +5492,8 @@ static kmp_setting_t __kmp_stg_table[] = {
#if KMP_AFFINITY_SUPPORTED
{"KMP_AFFINITY", __kmp_stg_parse_affinity, __kmp_stg_print_affinity, NULL,
0, 0},
{"KMP_HIDDEN_HELPER_AFFINITY", __kmp_stg_parse_hh_affinity,
__kmp_stg_print_hh_affinity, NULL, 0, 0},
#ifdef KMP_GOMP_COMPAT
{"GOMP_CPU_AFFINITY", __kmp_stg_parse_gomp_cpu_affinity, NULL,
/* no print */ NULL, 0, 0},
@ -6199,10 +6221,14 @@ void __kmp_env_initialize(char const *string) {
__kmp_affinity.type = affinity_compact;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
}
if (__kmp_hh_affinity.type == affinity_default)
__kmp_hh_affinity.type = affinity_compact;
if (__kmp_affinity_top_method == affinity_top_method_default)
__kmp_affinity_top_method = affinity_top_method_all;
if (__kmp_affinity.gran == KMP_HW_UNKNOWN)
__kmp_affinity.gran = KMP_HW_PROC_GROUP;
if (__kmp_hh_affinity.gran == KMP_HW_UNKNOWN)
__kmp_hh_affinity.gran = KMP_HW_PROC_GROUP;
} else
#endif /* KMP_GROUP_AFFINITY */
@ -6242,6 +6268,8 @@ void __kmp_env_initialize(char const *string) {
__kmp_affinity.type = affinity_none;
}
}
if (__kmp_hh_affinity.type == affinity_default)
__kmp_hh_affinity.type = affinity_none;
if ((__kmp_affinity.gran == KMP_HW_UNKNOWN) &&
(__kmp_affinity.gran_levels < 0)) {
#if KMP_MIC_SUPPORTED
@ -6253,6 +6281,17 @@ void __kmp_env_initialize(char const *string) {
__kmp_affinity.gran = KMP_HW_CORE;
}
}
if ((__kmp_hh_affinity.gran == KMP_HW_UNKNOWN) &&
(__kmp_hh_affinity.gran_levels < 0)) {
#if KMP_MIC_SUPPORTED
if (__kmp_mic_type != non_mic) {
__kmp_hh_affinity.gran = KMP_HW_THREAD;
} else
#endif
{
__kmp_hh_affinity.gran = KMP_HW_CORE;
}
}
if (__kmp_affinity_top_method == affinity_top_method_default) {
__kmp_affinity_top_method = affinity_top_method_all;
}
@ -6260,7 +6299,8 @@ void __kmp_env_initialize(char const *string) {
}
#ifdef KMP_DEBUG
__kmp_print_affinity_settings(&__kmp_affinity);
for (const kmp_affinity_t *affinity : __kmp_affinities)
__kmp_print_affinity_settings(affinity);
KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.bind_types[0] != proc_bind_default);
K_DIAG(1, ("__kmp_nested_proc_bind.bind_types[0] == %d\n",
__kmp_nested_proc_bind.bind_types[0]));

View File

@ -1231,7 +1231,8 @@ static void __kmp_atfork_child(void) {
if (__kmp_nested_proc_bind.bind_types != NULL) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
}
__kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY");
for (kmp_affinity_t *affinity : __kmp_affinities)
*affinity = KMP_AFFINITY_INIT(affinity->env_var);
__kmp_affin_fullMask = nullptr;
__kmp_affin_origMask = nullptr;
#endif // KMP_AFFINITY_SUPPORTED

View File

@ -0,0 +1,120 @@
// RUN: %libomp-cxx-compile
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=socket,compact %libomp-run 2>&1 | FileCheck --check-prefix=SOCKET %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=core,scatter %libomp-run 2>&1 | FileCheck --check-prefix=CORE %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY='verbose,granularity=fine,explicit,proclist=[0,1]' %libomp-run 2>&1 | FileCheck --check-prefix=FINE %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=socket,compact KMP_AFFINITY=compact,granularity=fine %libomp-run 2>&1 | FileCheck --check-prefix=SOCKET %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=core,scatter KMP_AFFINITY=compact,granularity=socket %libomp-run 2>&1 | FileCheck --check-prefix=CORE %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY='verbose,granularity=fine,explicit,proclist=[0,1]' KMP_AFFINITY=compact,granularity=core %libomp-run 2>&1 | FileCheck --check-prefix=FINE %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=socket,compact OMP_PROC_BIND=close OMP_PLACES=threads %libomp-run 2>&1 | FileCheck --check-prefix=SOCKET %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=core,scatter OMP_PROC_BIND=close OMP_PLACES=sockets %libomp-run 2>&1 | FileCheck --check-prefix=CORE %s
// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
// RUN: KMP_HIDDEN_HELPER_AFFINITY='verbose,granularity=fine,explicit,proclist=[0,1]' OMP_PROC_BIND=cores OMP_PLACES=cores %libomp-run 2>&1 | FileCheck --check-prefix=FINE %s
/*
* This test aims to check hidden helper affinity
*
* #pragma omp parallel for
* for (int i = 0; i < N; ++i) {
* int data1 = 0, data2 = 0;
* #pragma omp taskgroup
* {
* #pragma omp hidden helper task shared(data1)
* {
* data1 = 1;
* }
* #pragma omp hidden helper task shared(data2)
* {
* data2 = 2;
* }
* }
* assert(data1 == 1);
* assert(data2 == 2);
* }
*/
#include "common.h"
extern "C" {
struct kmp_task_t_with_privates {
kmp_task_t task;
};
struct anon {
int32_t *data;
};
}
template <int I>
kmp_int32 omp_task_entry(kmp_int32 gtid, kmp_task_t_with_privates *task) {
auto shareds = reinterpret_cast<anon *>(task->task.shareds);
auto p = shareds->data;
*p = I;
return 0;
}
int main(int argc, char *argv[]) {
constexpr const int N = 16;
#pragma omp parallel for
for (int i = 0; i < N; ++i) {
int32_t gtid = __kmpc_global_thread_num(nullptr);
int32_t data1 = 0;
__kmpc_taskgroup(nullptr, gtid);
auto task1 = __kmpc_omp_target_task_alloc(
nullptr, gtid, 1, sizeof(kmp_task_t_with_privates), sizeof(anon),
reinterpret_cast<kmp_routine_entry_t>(omp_task_entry<1>), -1);
auto shareds = reinterpret_cast<anon *>(task1->shareds);
shareds->data = &data1;
__kmpc_omp_task(nullptr, gtid, task1);
__kmpc_end_taskgroup(nullptr, gtid);
assert(data1 == 1);
}
std::cout << "PASS\n";
return 0;
}
// SOCKET: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: Threads may migrate across
// SOCKET-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: pid {{[0-9]+}} tid {{[0-9]+}} thread 1 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM:[0-9]+]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID:[0-9]+]] tid {{[0-9]+}} thread 2 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 3 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 4 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 5 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 6 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 7 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 8 bound to OS proc set
// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 9 bound to OS proc set
// CORE: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: Threads may migrate across
// CORE-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: pid {{[0-9]+}} tid {{[0-9]+}} thread 1 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM:[0-9]+]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID:[0-9]+]] tid {{[0-9]+}} thread 2 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 3 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 4 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 5 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 6 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 7 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 8 bound to OS proc set
// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 9 bound to OS proc set
// FINE-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: Threads may migrate across
// FINE-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: pid {{[0-9]+}} tid {{[0-9]+}} thread 1 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM:[0-9]+]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID:[0-9]+]] tid {{[0-9]+}} thread 2 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 3 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 4 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 5 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 6 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 7 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 8 bound to OS proc set
// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 9 bound to OS proc set
// End of file