[OpenMP] Add using bit flags to select Libomptarget Information

Summary:
This patch adds more fine-grained support over which information is output from the libomptarget runtime when run with the environment variable LIBOMPTARGET_INFO set. An extensible set of flags can be used to pick and choose which information the user is interested in.

Reviewers: jdoerfert JonChesterfield grokos

Differential Revision: https://reviews.llvm.org/D93727
This commit is contained in:
Joseph Huber 2020-12-18 15:14:44 -05:00 committed by Huber, Joseph
parent f7463ca3cc
commit fe5d51a489
7 changed files with 177 additions and 67 deletions

View File

@ -37,24 +37,38 @@
#ifndef _OMPTARGET_DEBUG_H
#define _OMPTARGET_DEBUG_H
static inline int getInfoLevel() {
static int InfoLevel = -1;
if (InfoLevel >= 0)
return InfoLevel;
#include <mutex>
if (char *EnvStr = getenv("LIBOMPTARGET_INFO"))
InfoLevel = std::stoi(EnvStr);
/// 32-Bit field data attributes controlling information presented to the user.
enum OpenMPInfoType : uint32_t {
// Print data arguments and attributes upon entering an OpenMP device kernel.
OMP_INFOTYPE_KERNEL_ARGS = 0x0001,
// Indicate when an address already exists in the device mapping table.
OMP_INFOTYPE_MAPPING_EXISTS = 0x0002,
// Dump the contents of the device pointer map at kernel exit or failure.
OMP_INFOTYPE_DUMP_TABLE = 0x0004,
// Print kernel information from target device plugins
OMP_INFOTYPE_PLUGIN_KERNEL = 0x0010,
};
static inline uint32_t getInfoLevel() {
static uint32_t InfoLevel = 0;
static std::once_flag Flag{};
std::call_once(Flag, []() {
if (char *EnvStr = getenv("LIBOMPTARGET_INFO"))
InfoLevel = std::stoi(EnvStr);
});
return InfoLevel;
}
static inline int getDebugLevel() {
static int DebugLevel = -1;
if (DebugLevel >= 0)
return DebugLevel;
if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG"))
DebugLevel = std::stoi(EnvStr);
static inline uint32_t getDebugLevel() {
static uint32_t DebugLevel = 0;
static std::once_flag Flag{};
std::call_once(Flag, []() {
if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG"))
DebugLevel = std::stoi(EnvStr);
});
return DebugLevel;
}
@ -107,7 +121,7 @@ static inline int getDebugLevel() {
/// Print a generic information string used if LIBOMPTARGET_INFO=1
#define INFO_MESSAGE(_num, ...) \
do { \
fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num); \
fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", (int)_num); \
fprintf(stderr, __VA_ARGS__); \
} while (0)

View File

@ -54,6 +54,13 @@ class SourceInfo {
return std::string(reinterpret_cast<const char *>(name));
}
std::string initStr(const ident_t *loc) {
if (!loc)
return ";unknown;unknown;0;0;;";
else
return std::string(reinterpret_cast<const char *>(loc->psource));
}
/// Get n-th substring in an expression separated by ;.
std::string getSubstring(const int n) const {
std::size_t begin = sourceStr.find(';');
@ -73,7 +80,7 @@ class SourceInfo {
public:
SourceInfo(const ident_t *loc)
: sourceStr(initStr(loc->psource)), name(getSubstring(1)),
: sourceStr(initStr(loc)), name(getSubstring(1)),
filename(removePath(getSubstring(0))), line(std::stoi(getSubstring(2))),
column(std::stoi(getSubstring(3))) {}

View File

@ -501,11 +501,12 @@ public:
DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit;
}
INFO(DeviceId,
"Device supports up to %d CUDA blocks and %d threads with a "
"warp size of %d\n",
DeviceData[DeviceId].BlocksPerGrid,
DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize);
if (getDebugLevel() || (getInfoLevel() & OMP_INFOTYPE_PLUGIN_KERNEL))
INFO(DeviceId,
"Device supports up to %d CUDA blocks and %d threads with a "
"warp size of %d\n",
DeviceData[DeviceId].BlocksPerGrid,
DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize);
// Set default number of teams
if (EnvNumTeams > 0) {
@ -937,14 +938,15 @@ public:
CudaBlocksPerGrid = TeamNum;
}
INFO(DeviceId,
"Launching kernel %s with %d blocks and %d threads in %s "
"mode\n",
(getOffloadEntry(DeviceId, TgtEntryPtr))
? getOffloadEntry(DeviceId, TgtEntryPtr)->name
: "(null)",
CudaBlocksPerGrid, CudaThreadsPerBlock,
(KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic");
if (getDebugLevel() || (getInfoLevel() & OMP_INFOTYPE_PLUGIN_KERNEL))
INFO(DeviceId,
"Launching kernel %s with %d blocks and %d threads in %s "
"mode\n",
(getOffloadEntry(DeviceId, TgtEntryPtr))
? getOffloadEntry(DeviceId, TgtEntryPtr)->name
: "(null)",
CudaBlocksPerGrid, CudaThreadsPerBlock,
(KernelInfo->ExecutionMode == SPMD) ? "SPMD" : "Generic");
CUstream Stream = getStream(DeviceId, AsyncInfo);
Err = cuLaunchKernel(KernelInfo->Func, CudaBlocksPerGrid, /* gridDimY */ 1,

View File

@ -49,10 +49,11 @@ DeviceTy::DeviceTy(RTLInfoTy *RTL)
MemoryManager(nullptr) {}
DeviceTy::~DeviceTy() {
if (DeviceID == -1 || getInfoLevel() < 1)
if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
return;
dumpTargetPointerMappings(*this);
ident_t loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
dumpTargetPointerMappings(&loc, *this);
}
int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
@ -217,14 +218,16 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
HT.incRefCount();
uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
INFO(DeviceID,
"Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
", "
"Size=%" PRId64 ",%s RefCount=%s, Name=%s\n",
(IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(tp),
Size, (UpdateRefCount ? " updated" : ""),
HT.isRefCountInf() ? "INF" : std::to_string(HT.getRefCount()).c_str(),
(HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "(null)");
if (getDebugLevel() || getInfoLevel() & OMP_INFOTYPE_MAPPING_EXISTS)
INFO(DeviceID,
"Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
", "
"Size=%" PRId64 ",%s RefCount=%s, Name=%s\n",
(IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(tp),
Size, (UpdateRefCount ? " updated" : ""),
HT.isRefCountInf() ? "INF"
: std::to_string(HT.getRefCount()).c_str(),
(HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
rc = (void *)tp;
} else if ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && !IsImplicit) {
// Explicit extension of mapped data - not allowed.

View File

@ -57,22 +57,27 @@ static void HandleTargetOutcome(bool success, ident_t *loc = nullptr) {
break;
case tgt_mandatory:
if (!success) {
if (getInfoLevel() > 1)
if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
for (const auto &Device : PM->Devices)
dumpTargetPointerMappings(Device);
dumpTargetPointerMappings(loc, Device);
else
FAILURE_MESSAGE("run with env LIBOMPTARGET_INFO>1 to dump host-target "
"pointer maps\n");
FAILURE_MESSAGE("Run with LIBOMPTARGET_DEBUG=%d to dump host-target "
"pointer mappings.\n",
OMP_INFOTYPE_DUMP_TABLE);
SourceInfo info(loc);
if (info.isAvailible())
fprintf(stderr, "%s:%d:%d: ", info.getFilename(), info.getLine(),
info.getColumn());
else
FAILURE_MESSAGE(
"Build with debug information to provide more information");
FAILURE_MESSAGE("Source location information not present. Compile with "
"-g or -gline-tables-only.\n");
FATAL_MESSAGE0(
1, "failure of target construct while offloading is mandatory");
} else {
if (getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)
for (const auto &Device : PM->Devices)
dumpTargetPointerMappings(loc, Device);
}
break;
}
@ -147,12 +152,15 @@ EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id,
DeviceTy &Device = PM->Devices[device_id];
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
arg_names, "Entering OpenMP data region");
#ifdef OMPTARGET_DEBUG
for (int i = 0; i < arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 ", Name=%s\n",
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "(null)");
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
}
#endif
@ -227,12 +235,15 @@ EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id,
return;
}
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
arg_names, "Exiting OpenMP data region");
#ifdef OMPTARGET_DEBUG
for (int i=0; i<arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 ", Name=%s\n",
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "(null)");
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
}
#endif
@ -294,6 +305,10 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
return;
}
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
arg_names, "Updating OpenMP data");
DeviceTy &Device = PM->Devices[device_id];
int rc = targetDataUpdate(Device, arg_num, args_base, args, arg_sizes,
arg_types, arg_names, arg_mappers);
@ -351,12 +366,15 @@ EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr,
return OFFLOAD_FAIL;
}
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
arg_names, "Entering OpenMP kernel");
#ifdef OMPTARGET_DEBUG
for (int i=0; i<arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 ", Name=%s\n",
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "(null)");
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
}
#endif
@ -423,12 +441,15 @@ EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
return OFFLOAD_FAIL;
}
if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types,
arg_names, "Entering OpenMP kernel");
#ifdef OMPTARGET_DEBUG
for (int i=0; i<arg_num; ++i) {
DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
", Type=0x%" PRIx64 ", Name=%s\n",
i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i],
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "(null)");
(arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown");
}
#endif
@ -436,7 +457,6 @@ EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id,
arg_types, arg_names, arg_mappers, team_num, thread_limit,
true /*team*/);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
return rc;
}

View File

@ -13,8 +13,9 @@
#ifndef _OMPTARGET_PRIVATE_H
#define _OMPTARGET_PRIVATE_H
#include <omptarget.h>
#include <Debug.h>
#include <SourceInfo.h>
#include <omptarget.h>
#include <cstdint>
@ -90,20 +91,60 @@ int __kmpc_get_target_offload(void) __attribute__((weak));
////////////////////////////////////////////////////////////////////////////////
/// dump a table of all the host-target pointer pairs on failure
static inline void dumpTargetPointerMappings(const DeviceTy &Device) {
static inline void dumpTargetPointerMappings(const ident_t *Loc,
const DeviceTy &Device) {
if (Device.HostDataToTargetMap.empty())
return;
fprintf(stderr, "Device %d Host-Device Pointer Mappings:\n", Device.DeviceID);
fprintf(stderr, "%-18s %-18s %s %s\n", "Host Ptr", "Target Ptr", "Size (B)",
"Declaration");
SourceInfo Kernel(Loc);
INFO(Device.DeviceID,
"OpenMP Host-Device pointer mappings after block at %s:%d:%d:\n",
Kernel.getFilename(), Kernel.getLine(), Kernel.getColumn());
INFO(Device.DeviceID, "%-18s %-18s %s %s %s\n", "Host Ptr", "Target Ptr",
"Size (B)", "RefCount", "Declaration");
for (const auto &HostTargetMap : Device.HostDataToTargetMap) {
SourceInfo info(HostTargetMap.HstPtrName);
fprintf(stderr, DPxMOD " " DPxMOD " %-8lu %s at %s:%d:%d\n",
DPxPTR(HostTargetMap.HstPtrBegin),
DPxPTR(HostTargetMap.TgtPtrBegin),
HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin, info.getName(),
info.getFilename(), info.getLine(), info.getColumn());
SourceInfo Info(HostTargetMap.HstPtrName);
INFO(Device.DeviceID, DPxMOD " " DPxMOD " %-8lu %-8ld %s at %s:%d:%d\n",
DPxPTR(HostTargetMap.HstPtrBegin), DPxPTR(HostTargetMap.TgtPtrBegin),
(long unsigned)(HostTargetMap.HstPtrEnd - HostTargetMap.HstPtrBegin),
HostTargetMap.getRefCount(), Info.getName(), Info.getFilename(),
Info.getLine(), Info.getColumn());
}
}
////////////////////////////////////////////////////////////////////////////////
/// Print out the names and properties of the arguments to each kernel
static inline void
printKernelArguments(const ident_t *Loc, const int64_t DeviceId,
const int32_t ArgNum, const int64_t *ArgSizes,
const int64_t *ArgTypes, const map_var_info_t *ArgNames,
const char *RegionType) {
SourceInfo info(Loc);
INFO(DeviceId, "%s at %s:%d:%d with %d arguments:\n", RegionType,
info.getFilename(), info.getLine(), info.getColumn(), ArgNum);
for (int32_t i = 0; i < ArgNum; ++i) {
const map_var_info_t varName = (ArgNames) ? ArgNames[i] : nullptr;
const char *type = nullptr;
const char *implicit =
(ArgTypes[i] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : "";
if (ArgTypes[i] & OMP_TGT_MAPTYPE_TO && ArgTypes[i] & OMP_TGT_MAPTYPE_FROM)
type = "tofrom";
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_TO)
type = "to";
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_FROM)
type = "from";
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_PRIVATE)
type = "private";
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_LITERAL)
type = "firstprivate";
else if (ArgTypes[i] & OMP_TGT_MAPTYPE_TARGET_PARAM && ArgSizes[i] != 0)
type = "alloc";
else
type = "use_address";
INFO(DeviceId, "%s(%s)[%ld] %s\n", type,
getNameFromMapping(varName).c_str(), ArgSizes[i], implicit);
}
}

View File

@ -1,15 +1,38 @@
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_INFO=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -gline-tables-only && env LIBOMPTARGET_INFO=23 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
#include <stdio.h>
#include <omp.h>
int main() {
int ptr = 1;
#define N 64
// INFO: CUDA device {{[0-9]+}} info: Device supports up to {{[0-9]+}} CUDA blocks and {{[0-9]+}} threads with a warp size of {{[0-9]+}}
// INFO: CUDA device {{[0-9]+}} info: Launching kernel {{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
#pragma omp target map(tofrom:ptr)
{ptr = 1;}
int main() {
int A[N];
int B[N];
int C[N];
int val = 1;
// INFO: CUDA device 0 info: Device supports up to {{.*}} CUDA blocks and {{.*}} threads with a warp size of {{.*}}
// INFO: Libomptarget device 0 info: Entering OpenMP data region at info.c:33:1 with 3 arguments:
// INFO: Libomptarget device 0 info: alloc(A[0:64])[256]
// INFO: Libomptarget device 0 info: tofrom(B[0:64])[256]
// INFO: Libomptarget device 0 info: to(C[0:64])[256]
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:33:1:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 C[0:64] at info.c:11:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 B[0:64] at info.c:10:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 A[0:64] at info.c:9:7
// INFO: Libomptarget device 0 info: Entering OpenMP kernel at info.c:34:1 with 1 arguments:
// INFO: Libomptarget device 0 info: firstprivate(val)[4]
// INFO: CUDA device 0 info: Launching kernel {{.*}} with {{.*}} and {{.*}} threads in {{.*}} mode
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:34:1:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 C[0:64] at info.c:11:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 B[0:64] at info.c:10:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 A[0:64] at info.c:9:7
// INFO: Libomptarget device 0 info: Exiting OpenMP data region at info.c:33:1
#pragma omp target data map(alloc:A[0:N]) map(tofrom:B[0:N]) map(to:C[0:N])
#pragma omp target firstprivate(val)
{ val = 1; }
return 0;
}