forked from OSchip/llvm-project
[OpenMP][OMPT] Introduce a guard to handle OMPT return address
This is an alternative approach to address inconsistencies pointed out in: D90078 This patch makes sure that the return address is reset, when leaving the scope. In some cases, I had to move the macro out of an if-statement to have it in the right scope, in some cases I added an additional block to restrict the scope. This patch does not handle inconsistencies, which might occur if the return address is still set when we call into the application. Test case (repeated_calls.c) provided by @hbae Differential Revision: https://reviews.llvm.org/D91692
This commit is contained in:
parent
b281a05dac
commit
6d3b81664a
|
@ -297,8 +297,8 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
|
|||
parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
|
||||
}
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
#endif
|
||||
|
||||
#if INCLUDE_SSC_MARKS
|
||||
|
@ -713,8 +713,8 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
__kmp_threads[global_tid]->th.th_ident = loc;
|
||||
// TODO: explicit barrier_wait_id:
|
||||
|
@ -851,8 +851,8 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
|
|||
kmp_team_t *team;
|
||||
ompt_wait_id_t lck;
|
||||
void *codeptr_ra;
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
if (ompt_enabled.enabled) {
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
team = __kmp_team_from_gtid(gtid);
|
||||
lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
|
||||
/* OMPT state update */
|
||||
|
@ -1607,8 +1607,8 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[global_tid]->th.th_ident = loc;
|
||||
|
@ -1671,8 +1671,8 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[global_tid]->th.th_ident = loc;
|
||||
|
@ -2069,8 +2069,8 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
#endif
|
||||
/* This barrier is not a barrier region boundary */
|
||||
#if USE_ITT_NOTIFY
|
||||
|
@ -2083,11 +2083,9 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
|
|||
|
||||
// Consider next barrier a user-visible barrier for barrier region boundaries
|
||||
// Nesting checks are already handled by the single construct checks
|
||||
|
||||
{
|
||||
#if OMPT_SUPPORT
|
||||
if (ompt_enabled.enabled) {
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
}
|
||||
#endif
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
|
||||
|
@ -2099,6 +2097,7 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
|
|||
ompt_frame->enter_frame = ompt_data_none;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
@ -3462,8 +3461,8 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[global_tid]->th.th_ident = loc;
|
||||
|
@ -3651,8 +3650,8 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[global_tid]->th.th_ident =
|
||||
|
@ -3733,8 +3732,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[global_tid]->th.th_ident = loc;
|
||||
|
@ -3759,8 +3758,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
#if USE_ITT_NOTIFY
|
||||
__kmp_threads[global_tid]->th.th_ident = loc;
|
||||
|
@ -3780,8 +3779,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
|
|||
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
|
||||
if (ompt_frame->enter_frame.ptr == NULL)
|
||||
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
}
|
||||
OMPT_STORE_RETURN_ADDRESS(global_tid);
|
||||
#endif
|
||||
// TODO: implicit barrier: should be exposed
|
||||
#if USE_ITT_NOTIFY
|
||||
|
|
|
@ -573,13 +573,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
|
|||
gtid, lb, ub, str, chunk_sz)); \
|
||||
\
|
||||
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
|
||||
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
|
||||
(schedule) != kmp_sch_static); \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
|
||||
(kmp_int *)p_ub, (kmp_int *)&stride); \
|
||||
{ \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
|
||||
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
|
||||
(schedule) != kmp_sch_static); \
|
||||
} \
|
||||
{ \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
|
||||
(kmp_int *)p_ub, (kmp_int *)&stride); \
|
||||
} \
|
||||
if (status) { \
|
||||
KMP_DEBUG_ASSERT(stride == str); \
|
||||
*p_ub += (str > 0) ? 1 : -1; \
|
||||
|
@ -609,12 +613,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
|
|||
gtid, lb, ub, str, chunk_sz)); \
|
||||
\
|
||||
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
|
||||
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
|
||||
(kmp_int *)p_ub, (kmp_int *)&stride); \
|
||||
{ \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
|
||||
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
|
||||
TRUE); \
|
||||
} \
|
||||
{ \
|
||||
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
|
||||
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
|
||||
(kmp_int *)p_ub, (kmp_int *)&stride); \
|
||||
} \
|
||||
if (status) { \
|
||||
KMP_DEBUG_ASSERT(stride == str); \
|
||||
*p_ub += (str > 0) ? 1 : -1; \
|
||||
|
@ -1482,12 +1491,13 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
|
|||
task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
|
||||
(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
|
||||
|
||||
{
|
||||
#if OMPT_SUPPORT
|
||||
OMPT_STORE_RETURN_ADDRESS(gtid);
|
||||
#endif
|
||||
|
||||
KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);
|
||||
|
||||
}
|
||||
task(data);
|
||||
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
|
||||
KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
|
||||
|
|
|
@ -75,11 +75,13 @@ inline void *__ompt_load_return_address(int gtid) {
|
|||
return return_address;
|
||||
}
|
||||
|
||||
#define OMPT_STORE_RETURN_ADDRESS(gtid) \
|
||||
/*#define OMPT_STORE_RETURN_ADDRESS(gtid) \
|
||||
if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \
|
||||
!__kmp_threads[gtid]->th.ompt_thread_info.return_address) \
|
||||
__kmp_threads[gtid]->th.ompt_thread_info.return_address = \
|
||||
__builtin_return_address(0)
|
||||
__builtin_return_address(0)*/
|
||||
#define OMPT_STORE_RETURN_ADDRESS(gtid) \
|
||||
OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)};
|
||||
#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid)
|
||||
#define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \
|
||||
((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \
|
||||
|
@ -133,4 +135,23 @@ inline const char *ompt_get_runtime_version() {
|
|||
#define OMPT_REDUCTION_END
|
||||
#endif // ! OMPT_SUPPORT && OMPT_OPTIONAL
|
||||
|
||||
class OmptReturnAddressGuard {
|
||||
private:
|
||||
bool SetAddress{false};
|
||||
int Gtid;
|
||||
|
||||
public:
|
||||
OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Gtid(Gtid) {
|
||||
if (ompt_enabled.enabled && Gtid >= 0 && __kmp_threads[Gtid] &&
|
||||
!__kmp_threads[Gtid]->th.ompt_thread_info.return_address) {
|
||||
SetAddress = true;
|
||||
__kmp_threads[Gtid]->th.ompt_thread_info.return_address = ReturnAddress;
|
||||
}
|
||||
}
|
||||
~OmptReturnAddressGuard() {
|
||||
if (SetAddress)
|
||||
__kmp_threads[Gtid]->th.ompt_thread_info.return_address = NULL;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
// RUN: %libomp-compile-and-run | FileCheck %s
|
||||
// REQUIRES: ompt
|
||||
|
||||
#define USE_PRIVATE_TOOL 1
|
||||
#include "callback.h"
|
||||
|
||||
__attribute__((noinline))
|
||||
int foo(int x) {
|
||||
#pragma omp parallel num_threads(2)
|
||||
{
|
||||
#pragma omp atomic
|
||||
x++;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
__attribute__((noinline))
|
||||
int bar(int x) {
|
||||
#pragma omp parallel num_threads(2)
|
||||
{
|
||||
#pragma omp critical
|
||||
x++;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
int main() {
|
||||
int y;
|
||||
y = foo(y);
|
||||
y = bar(y);
|
||||
y = foo(y);
|
||||
return 0;
|
||||
|
||||
// CHECK-NOT: {{^}}0: Could not register callback
|
||||
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]
|
||||
|
||||
// First call to foo
|
||||
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin
|
||||
// CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]
|
||||
|
||||
// Call to bar
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
|
||||
|
||||
// Second call to foo
|
||||
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
|
||||
// CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS]]
|
||||
|
||||
}
|
||||
|
||||
static void on_ompt_callback_thread_begin(
|
||||
ompt_thread_t thread_type,
|
||||
ompt_data_t *thread_data) {
|
||||
if (thread_data->ptr)
|
||||
printf("%s\n", "0: thread_data initially not null");
|
||||
thread_data->value = ompt_get_unique_id();
|
||||
printf("%" PRIu64 ":" _TOOL_PREFIX
|
||||
" ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
|
||||
ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
|
||||
thread_type, thread_data->value);
|
||||
}
|
||||
|
||||
static void on_ompt_callback_parallel_begin(
|
||||
ompt_data_t *encountering_task_data,
|
||||
const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
|
||||
uint32_t requested_team_size, int flag, const void *codeptr_ra) {
|
||||
if (parallel_data->ptr)
|
||||
printf("0: parallel_data initially not null\n");
|
||||
parallel_data->value = ompt_get_unique_id();
|
||||
int invoker = flag & 0xF;
|
||||
const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
|
||||
const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
|
||||
printf("%" PRIu64 ":" _TOOL_PREFIX
|
||||
" ompt_event_%s_begin: parent_task_id=%" PRIu64
|
||||
", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
|
||||
"parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
|
||||
", codeptr_ra=%p, invoker=%d\n",
|
||||
ompt_get_thread_data()->value, event, encountering_task_data->value,
|
||||
encountering_task_frame->exit_frame.ptr,
|
||||
encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
|
||||
requested_team_size, codeptr_ra, invoker);
|
||||
}
|
||||
|
||||
int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num,
|
||||
ompt_data_t *tool_data) {
|
||||
ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
|
||||
ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id");
|
||||
ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
|
||||
|
||||
register_callback(ompt_callback_thread_begin);
|
||||
register_callback(ompt_callback_parallel_begin);
|
||||
printf("0: NULL_POINTER=%p\n", (void *)NULL);
|
||||
return 1; // success
|
||||
}
|
||||
|
||||
void ompt_finalize(ompt_data_t *tool_data) {}
|
||||
|
||||
ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
|
||||
const char *runtime_version) {
|
||||
static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,
|
||||
&ompt_finalize, 0};
|
||||
return &ompt_start_tool_result;
|
||||
}
|
Loading…
Reference in New Issue