llvm-project/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

895 lines
31 KiB
C++

//===- PluginInterface.cpp - Target independent plugin device interface ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//
#include "PluginInterface.h"
#include "Debug.h"
#include "GlobalHandler.h"
#include "elf_common.h"
#include "omptarget.h"
#include "omptargetplugin.h"
#include <cstdint>
#include <limits>
using namespace llvm;
using namespace omp;
using namespace target;
using namespace plugin;
GenericPluginTy *Plugin::SpecificPlugin = nullptr;
AsyncInfoWrapperTy::~AsyncInfoWrapperTy() {
// If we used a local async info object we want synchronous behavior.
// In that case, and assuming the current status code is OK, we will
// synchronize explicitly when the object is deleted.
if (AsyncInfoPtr == &LocalAsyncInfo && LocalAsyncInfo.Queue && !Err)
Err = Device.synchronize(&LocalAsyncInfo);
}
Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
DeviceImageTy &Image) {
PreferredNumThreads = getDefaultNumThreads(GenericDevice);
if (isGenericMode())
PreferredNumThreads += GenericDevice.getWarpSize();
MaxNumThreads = GenericDevice.getThreadLimit();
DynamicMemorySize = GenericDevice.getDynamicMemorySize();
return initImpl(GenericDevice, Image);
}
Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
ptrdiff_t *ArgOffsets, int32_t NumArgs,
uint64_t NumTeamsClause,
uint32_t ThreadLimitClause,
uint64_t LoopTripCount,
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
llvm::SmallVector<void *, 16> Args;
llvm::SmallVector<void *, 16> Ptrs;
void *KernelArgsPtr = prepareArgs(GenericDevice, ArgPtrs, ArgOffsets, NumArgs,
Args, Ptrs, AsyncInfoWrapper);
uint32_t NumThreads = getNumThreads(GenericDevice, ThreadLimitClause);
uint64_t NumBlocks =
getNumBlocks(GenericDevice, NumTeamsClause, LoopTripCount, NumThreads);
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
"Launching kernel %s with %" PRIu64
" blocks and %d threads in %s mode\n",
getName(), NumBlocks, NumThreads, getExecutionModeName());
return launchImpl(GenericDevice, NumThreads, NumBlocks, DynamicMemorySize,
NumArgs, KernelArgsPtr, AsyncInfoWrapper);
}
void *GenericKernelTy::prepareArgs(GenericDeviceTy &GenericDevice,
void **ArgPtrs, ptrdiff_t *ArgOffsets,
int32_t NumArgs,
llvm::SmallVectorImpl<void *> &Args,
llvm::SmallVectorImpl<void *> &Ptrs,
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
Args.resize(NumArgs);
Ptrs.resize(NumArgs);
if (NumArgs == 0)
return nullptr;
for (int I = 0; I < NumArgs; ++I) {
Ptrs[I] = (void *)((intptr_t)ArgPtrs[I] + ArgOffsets[I]);
Args[I] = &Ptrs[I];
}
return &Args[0];
}
uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
uint32_t ThreadLimitClause) const {
return std::min(MaxNumThreads, (ThreadLimitClause > 0) ? ThreadLimitClause
: PreferredNumThreads);
}
uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
uint64_t NumTeamsClause,
uint64_t LoopTripCount,
uint32_t NumThreads) const {
uint64_t PreferredNumBlocks = getDefaultNumBlocks(GenericDevice);
if (NumTeamsClause > 0) {
PreferredNumBlocks = NumTeamsClause;
} else if (LoopTripCount > 0) {
if (isSPMDMode()) {
// We have a combined construct, i.e. `target teams distribute
// parallel for [simd]`. We launch so many teams so that each thread
// will execute one iteration of the loop. round up to the nearest
// integer
PreferredNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
} else {
assert((isGenericMode() || isGenericSPMDMode()) &&
"Unexpected execution mode!");
// If we reach this point, then we have a non-combined construct, i.e.
// `teams distribute` with a nested `parallel for` and each team is
// assigned one iteration of the `distribute` loop. E.g.:
//
// #pragma omp target teams distribute
// for(...loop_tripcount...) {
// #pragma omp parallel for
// for(...) {}
// }
//
// Threads within a team will execute the iterations of the `parallel`
// loop.
PreferredNumBlocks = LoopTripCount;
}
}
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
}
GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
const llvm::omp::GV &OMPGridValues)
: OMP_TeamLimit("OMP_TEAM_LIMIT"), OMP_NumTeams("OMP_NUM_TEAMS"),
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
// Do not initialize the following two envars since they depend on the
// device initialization. These cannot be consulted until the device is
// initialized correctly. We intialize them in GenericDeviceTy::init().
OMPX_TargetStackSize(), OMPX_TargetHeapSize(), MemoryManager(nullptr),
DeviceId(DeviceId), GridValues(OMPGridValues),
PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock() {
if (OMP_NumTeams > 0)
GridValues.GV_Max_Teams =
std::min(GridValues.GV_Max_Teams, uint32_t(OMP_NumTeams));
if (OMP_TeamsThreadLimit > 0)
GridValues.GV_Max_WG_Size =
std::min(GridValues.GV_Max_WG_Size, uint32_t(OMP_TeamsThreadLimit));
}
Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
if (auto Err = initImpl(Plugin))
return Err;
// Read and reinitialize the envars that depend on the device initialization.
// Notice these two envars may change the stack size and heap size of the
// device, so they need the device properly initialized.
auto StackSizeEnvarOrErr = UInt64Envar::create(
"LIBOMPTARGET_STACK_SIZE",
[this](uint64_t &V) -> Error { return getDeviceStackSize(V); },
[this](uint64_t V) -> Error { return setDeviceStackSize(V); });
if (!StackSizeEnvarOrErr)
return StackSizeEnvarOrErr.takeError();
OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr);
auto HeapSizeEnvarOrErr = UInt64Envar::create(
"LIBOMPTARGET_HEAP_SIZE",
[this](uint64_t &V) -> Error { return getDeviceHeapSize(V); },
[this](uint64_t V) -> Error { return setDeviceHeapSize(V); });
if (!HeapSizeEnvarOrErr)
return HeapSizeEnvarOrErr.takeError();
OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr);
// Enable the memory manager if required.
auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv();
if (EnableMM)
MemoryManager = new MemoryManagerTy(*this, ThresholdMM);
return Plugin::success();
}
Error GenericDeviceTy::deinit() {
// Delete the memory manager before deinitilizing the device. Otherwise,
// we may delete device allocations after the device is deinitialized.
if (MemoryManager)
delete MemoryManager;
MemoryManager = nullptr;
return deinitImpl();
}
Expected<__tgt_target_table *>
GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
const __tgt_device_image *TgtImage) {
DP("Load data from image " DPxMOD "\n", DPxPTR(TgtImage->ImageStart));
// Load the binary and allocate the image object. Use the next available id
// for the image id, which is the number of previously loaded images.
auto ImageOrErr = loadBinaryImpl(TgtImage, LoadedImages.size());
if (!ImageOrErr)
return ImageOrErr.takeError();
DeviceImageTy *Image = *ImageOrErr;
assert(Image != nullptr && "Invalid image");
// Add the image to list.
LoadedImages.push_back(Image);
// Setup the device environment if needed.
if (auto Err = setupDeviceEnvironment(Plugin, *Image))
return std::move(Err);
// Register all offload entries of the image.
if (auto Err = registerOffloadEntries(*Image))
return std::move(Err);
// Return the pointer to the table of entries.
return Image->getOffloadEntryTable();
}
Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin,
DeviceImageTy &Image) {
// There are some plugins that do not need this step.
if (!shouldSetupDeviceEnvironment())
return Plugin::success();
DeviceEnvironmentTy DeviceEnvironment;
DeviceEnvironment.DebugKind = OMPX_DebugKind;
DeviceEnvironment.NumDevices = Plugin.getNumDevices();
// TODO: The device ID used here is not the real device ID used by OpenMP.
DeviceEnvironment.DeviceNum = DeviceId;
DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize;
// Create the metainfo of the device environment global.
GlobalTy DevEnvGlobal("omptarget_device_environment",
sizeof(DeviceEnvironmentTy), &DeviceEnvironment);
// Write device environment values to the device.
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) {
DP("Missing symbol %s, continue execution anyway.\n",
DevEnvGlobal.getName().data());
consumeError(std::move(Err));
}
return Plugin::success();
}
Error GenericDeviceTy::registerOffloadEntries(DeviceImageTy &Image) {
const __tgt_offload_entry *Begin = Image.getTgtImage()->EntriesBegin;
const __tgt_offload_entry *End = Image.getTgtImage()->EntriesEnd;
for (const __tgt_offload_entry *Entry = Begin; Entry != End; ++Entry) {
// The host should have always something in the address to uniquely
// identify the entry.
if (!Entry->addr)
return Plugin::error("Failure to register entry without address");
__tgt_offload_entry DeviceEntry = {0};
if (Entry->size) {
if (auto Err = registerGlobalOffloadEntry(Image, *Entry, DeviceEntry))
return Err;
} else {
if (auto Err = registerKernelOffloadEntry(Image, *Entry, DeviceEntry))
return Err;
}
assert(DeviceEntry.addr && "Device addr of offload entry cannot be null");
DP("Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n",
DPxPTR(Entry - Begin), (Entry->size) ? " global" : "", Entry->name,
DPxPTR(DeviceEntry.addr));
}
return Plugin::success();
}
Error GenericDeviceTy::registerGlobalOffloadEntry(
DeviceImageTy &Image, const __tgt_offload_entry &GlobalEntry,
__tgt_offload_entry &DeviceEntry) {
GenericPluginTy &Plugin = Plugin::get();
DeviceEntry = GlobalEntry;
// Create a metadata object for the device global.
GlobalTy DeviceGlobal(GlobalEntry.name, GlobalEntry.size);
// Get the address of the device of the global.
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
if (auto Err =
GHandler.getGlobalMetadataFromDevice(*this, Image, DeviceGlobal))
return Err;
// Store the device address on the device entry.
DeviceEntry.addr = DeviceGlobal.getPtr();
assert(DeviceEntry.addr && "Invalid device global's address");
// Note: In the current implementation declare target variables
// can either be link or to. This means that once unified
// memory is activated via the requires directive, the variable
// can be used directly from the host in both cases.
if (Plugin.getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY) {
// If unified memory is present any target link or to variables
// can access host addresses directly. There is no longer a
// need for device copies.
GlobalTy HostGlobal(GlobalEntry);
if (auto Err = GHandler.writeGlobalToDevice(*this, Image, HostGlobal,
DeviceGlobal))
return Err;
}
// Add the device entry on the entry table.
Image.getOffloadEntryTable().addEntry(DeviceEntry);
return Plugin::success();
}
Error GenericDeviceTy::registerKernelOffloadEntry(
DeviceImageTy &Image, const __tgt_offload_entry &KernelEntry,
__tgt_offload_entry &DeviceEntry) {
DeviceEntry = KernelEntry;
// Create a kernel object.
auto KernelOrErr = constructKernelEntry(KernelEntry, Image);
if (!KernelOrErr)
return KernelOrErr.takeError();
GenericKernelTy *Kernel = *KernelOrErr;
assert(Kernel != nullptr && "Invalid kernel");
// Initialize the kernel.
if (auto Err = Kernel->init(*this, Image))
return Err;
// Set the device entry address to the kernel address and store the entry on
// the entry table.
DeviceEntry.addr = (void *)Kernel;
Image.getOffloadEntryTable().addEntry(DeviceEntry);
return Plugin::success();
}
Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
if (!AsyncInfo || !AsyncInfo->Queue)
return Plugin::error("Invalid async info queue");
return synchronizeImpl(*AsyncInfo);
}
Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
TargetAllocTy Kind) {
void *Alloc = nullptr;
switch (Kind) {
case TARGET_ALLOC_DEFAULT:
case TARGET_ALLOC_DEVICE:
if (MemoryManager) {
Alloc = MemoryManager->allocate(Size, HostPtr);
if (!Alloc)
return Plugin::error("Failed to allocate from memory manager");
break;
}
[[fallthrough]];
case TARGET_ALLOC_HOST:
case TARGET_ALLOC_SHARED:
Alloc = allocate(Size, HostPtr, Kind);
if (!Alloc)
return Plugin::error("Failed to allocate from device allocator");
}
// Sucessful and valid allocation.
if (Alloc)
return Alloc;
// At this point means that we did not tried to allocate from the memory
// manager nor the device allocator.
return Plugin::error("Invalid target data allocation kind or requested "
"allocator not implemented yet");
}
Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
int Res;
if (MemoryManager)
Res = MemoryManager->free(TgtPtr);
else
Res = free(TgtPtr, Kind);
if (Res)
return Plugin::error("Failure to deallocate device pointer %p", TgtPtr);
return Plugin::success();
}
Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr,
int64_t Size, __tgt_async_info *AsyncInfo) {
auto Err = Plugin::success();
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper);
return Err;
}
Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr,
int64_t Size, __tgt_async_info *AsyncInfo) {
auto Err = Plugin::success();
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper);
return Err;
}
Error GenericDeviceTy::dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev,
void *DstPtr, int64_t Size,
__tgt_async_info *AsyncInfo) {
auto Err = Plugin::success();
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper);
return Err;
}
Error GenericDeviceTy::runTargetTeamRegion(
void *EntryPtr, void **ArgPtrs, ptrdiff_t *ArgOffsets, int32_t NumArgs,
uint64_t NumTeamsClause, uint32_t ThreadLimitClause, uint64_t LoopTripCount,
__tgt_async_info *AsyncInfo) {
auto Err = Plugin::success();
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
GenericKernelTy &GenericKernel =
*reinterpret_cast<GenericKernelTy *>(EntryPtr);
Err =
GenericKernel.launch(*this, ArgPtrs, ArgOffsets, NumArgs, NumTeamsClause,
ThreadLimitClause, LoopTripCount, AsyncInfoWrapper);
return Err;
}
Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) {
assert(AsyncInfoPtr && "Invalid async info");
*AsyncInfoPtr = new __tgt_async_info();
auto Err = Plugin::success();
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, *AsyncInfoPtr);
Err = initAsyncInfoImpl(AsyncInfoWrapper);
return Err;
}
Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
assert(DeviceInfo && "Invalid device info");
return initDeviceInfoImpl(DeviceInfo);
}
Error GenericDeviceTy::printInfo() {
// TODO: Print generic information here
return printInfoImpl();
}
Error GenericDeviceTy::createEvent(void **EventPtrStorage) {
return createEventImpl(EventPtrStorage);
}
Error GenericDeviceTy::destroyEvent(void *EventPtr) {
return destroyEventImpl(EventPtr);
}
Error GenericDeviceTy::recordEvent(void *EventPtr,
__tgt_async_info *AsyncInfo) {
auto Err = Plugin::success();
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
Err = recordEventImpl(EventPtr, AsyncInfoWrapper);
return Err;
}
Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
auto Err = Plugin::success();
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
Err = waitEventImpl(EventPtr, AsyncInfoWrapper);
return Err;
}
Error GenericDeviceTy::syncEvent(void *EventPtr) {
return syncEventImpl(EventPtr);
}
Error GenericPluginTy::init() {
auto NumDevicesOrErr = initImpl();
if (!NumDevicesOrErr)
return NumDevicesOrErr.takeError();
NumDevices = *NumDevicesOrErr;
if (NumDevices == 0)
return Plugin::success();
assert(Devices.size() == 0 && "Plugin already initialized");
Devices.resize(NumDevices, nullptr);
GlobalHandler = Plugin::createGlobalHandler();
assert(GlobalHandler && "Invalid global handler");
return Plugin::success();
}
Error GenericPluginTy::deinit() {
// There is no global handler if no device is available.
if (GlobalHandler)
delete GlobalHandler;
// Deinitialize all active devices.
for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) {
if (Devices[DeviceId]) {
if (auto Err = deinitDevice(DeviceId))
return Err;
}
assert(!Devices[DeviceId] && "Device was not deinitialized");
}
// Perform last deinitializations on the plugin.
return deinitImpl();
}
Error GenericPluginTy::initDevice(int32_t DeviceId) {
assert(!Devices[DeviceId] && "Device already initialized");
// Create the device and save the reference.
GenericDeviceTy *Device = Plugin::createDevice(DeviceId, NumDevices);
assert(Device && "Invalid device");
// Save the device reference into the list.
Devices[DeviceId] = Device;
// Initialize the device and its resources.
return Device->init(*this);
}
Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
// The device may be already deinitialized.
if (Devices[DeviceId] == nullptr)
return Plugin::success();
// Deinitialize the device and release its resources.
if (auto Err = Devices[DeviceId]->deinit())
return Err;
// Delete the device and invalidate its reference.
delete Devices[DeviceId];
Devices[DeviceId] = nullptr;
return Plugin::success();
}
/// Exposed library API function, basically wrappers around the GenericDeviceTy
/// functionality with the same name. All non-async functions are redirected
/// to the async versions right away with a NULL AsyncInfoPtr.
#ifdef __cplusplus
extern "C" {
#endif
int32_t __tgt_rtl_init_plugin() {
auto Err = Plugin::initIfNeeded();
if (Err)
REPORT("Failure to initialize plugin " GETNAME(TARGET_NAME) ": %s\n",
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_deinit_plugin() {
auto Err = Plugin::deinitIfNeeded();
if (Err)
REPORT("Failure to deinitialize plugin " GETNAME(TARGET_NAME) ": %s\n",
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *TgtImage) {
if (!Plugin::isActive())
return false;
return elf_check_machine(TgtImage, Plugin::get().getMagicElfBits());
}
int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage,
__tgt_image_info *Info) {
if (!Plugin::isActive())
return false;
if (!__tgt_rtl_is_valid_binary(TgtImage))
return false;
// A subarchitecture was not specified. Assume it is compatible.
if (!Info->Arch)
return true;
// Check the compatibility with all the available devices. Notice the
// devices may not be initialized yet.
auto CompatibleOrErr = Plugin::get().isImageCompatible(Info);
if (!CompatibleOrErr) {
// This error should not abort the execution, so we just inform the user
// through the debug system.
std::string ErrString = toString(CompatibleOrErr.takeError());
DP("Failure to check whether image %p is valid: %s\n", TgtImage,
ErrString.data());
return false;
}
bool Compatible = *CompatibleOrErr;
DP("Image is %scompatible with current environment: %s\n",
(Compatible) ? "" : "not", Info->Arch);
return Compatible;
}
int32_t __tgt_rtl_supports_empty_images() {
return Plugin::get().supportsEmptyImages();
}
int32_t __tgt_rtl_init_device(int32_t DeviceId) {
auto Err = Plugin::get().initDevice(DeviceId);
if (Err)
REPORT("Failure to initialize device %d: %s\n", DeviceId,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_deinit_device(int32_t DeviceId) {
auto Err = Plugin::get().deinitDevice(DeviceId);
if (Err)
REPORT("Failure to deinitialize device %d: %s\n", DeviceId,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_number_of_devices() { return Plugin::get().getNumDevices(); }
int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
Plugin::get().setRequiresFlag(RequiresFlags);
return RequiresFlags;
}
int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId,
int32_t DstDeviceId) {
return Plugin::get().isDataExchangable(SrcDeviceId, DstDeviceId);
}
__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
__tgt_device_image *TgtImage) {
GenericPluginTy &Plugin = Plugin::get();
auto TableOrErr = Plugin.getDevice(DeviceId).loadBinary(Plugin, TgtImage);
if (!TableOrErr) {
auto Err = TableOrErr.takeError();
REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage,
DeviceId, toString(std::move(Err)).data());
return nullptr;
}
__tgt_target_table *Table = *TableOrErr;
assert(Table != nullptr && "Invalid table");
return Table;
}
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
int32_t Kind) {
auto AllocOrErr = Plugin::get().getDevice(DeviceId).dataAlloc(
Size, HostPtr, (TargetAllocTy)Kind);
if (!AllocOrErr) {
auto Err = AllocOrErr.takeError();
REPORT("Failure to allocate device memory: %s\n",
toString(std::move(Err)).data());
return nullptr;
}
assert(*AllocOrErr && "Null pointer upon successful allocation");
return *AllocOrErr;
}
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) {
auto Err =
Plugin::get().getDevice(DeviceId).dataDelete(TgtPtr, (TargetAllocTy)Kind);
if (Err)
REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
int64_t Size) {
return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size,
/* AsyncInfoPtr */ nullptr);
}
int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
void *HstPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
auto Err = Plugin::get().getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size,
AsyncInfoPtr);
if (Err)
REPORT("Failure to copy data from host to device. Pointers: host "
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
int64_t Size) {
return __tgt_rtl_data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size,
/* AsyncInfoPtr */ nullptr);
}
int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
void *TgtPtr, int64_t Size,
__tgt_async_info *AsyncInfoPtr) {
auto Err = Plugin::get().getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr,
Size, AsyncInfoPtr);
if (Err)
REPORT("Faliure to copy data from device to host. Pointers: host "
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
int32_t DstDeviceId, void *DstPtr,
int64_t Size) {
return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
Size, /* AsyncInfoPtr */ nullptr);
}
int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
int DstDeviceId, void *DstPtr,
int64_t Size,
__tgt_async_info *AsyncInfo) {
GenericDeviceTy &SrcDevice = Plugin::get().getDevice(SrcDeviceId);
GenericDeviceTy &DstDevice = Plugin::get().getDevice(DstDeviceId);
auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo);
if (Err)
REPORT("Failure to copy data from device (%d) to device (%d). Pointers: "
"host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t NumArgs, int32_t NumTeams,
int32_t ThreadLimit,
uint64_t LoopTripCount) {
return __tgt_rtl_run_target_team_region_async(DeviceId, TgtEntryPtr, TgtArgs,
TgtOffsets, NumArgs, NumTeams,
ThreadLimit, LoopTripCount,
/* AsyncInfoPtr */ nullptr);
}
int32_t __tgt_rtl_run_target_team_region_async(
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t NumArgs, int32_t NumTeams, int32_t ThreadLimit,
uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
auto Err = Plugin::get().getDevice(DeviceId).runTargetTeamRegion(
TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs, NumTeams, ThreadLimit,
LoopTripCount, AsyncInfoPtr);
if (Err)
REPORT("Failure to run target region " DPxMOD " in device %d: %s\n",
DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_synchronize(int32_t DeviceId,
__tgt_async_info *AsyncInfoPtr) {
auto Err = Plugin::get().getDevice(DeviceId).synchronize(AsyncInfoPtr);
if (Err)
REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t NumArgs) {
return __tgt_rtl_run_target_region_async(DeviceId, TgtEntryPtr, TgtArgs,
TgtOffsets, NumArgs,
/* AsyncInfoPtr */ nullptr);
}
int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr,
void **TgtArgs, ptrdiff_t *TgtOffsets,
int32_t NumArgs,
__tgt_async_info *AsyncInfoPtr) {
return __tgt_rtl_run_target_team_region_async(
DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs,
/* team num*/ 1, /* thread limit */ 1, /* loop tripcount */ 0,
AsyncInfoPtr);
}
void __tgt_rtl_print_device_info(int32_t DeviceId) {
if (auto Err = Plugin::get().getDevice(DeviceId).printInfo())
REPORT("Failure to print device %d info: %s\n", DeviceId,
toString(std::move(Err)).data());
}
int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) {
auto Err = Plugin::get().getDevice(DeviceId).createEvent(EventPtr);
if (Err)
REPORT("Failure to create event: %s\n", toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr,
__tgt_async_info *AsyncInfoPtr) {
auto Err =
Plugin::get().getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr);
if (Err)
REPORT("Failure to record event %p: %s\n", EventPtr,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr,
__tgt_async_info *AsyncInfoPtr) {
auto Err =
Plugin::get().getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr);
if (Err)
REPORT("Failure to wait event %p: %s\n", EventPtr,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) {
auto Err = Plugin::get().getDevice(DeviceId).syncEvent(EventPtr);
if (Err)
REPORT("Failure to synchronize event %p: %s\n", EventPtr,
toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) {
auto Err = Plugin::get().getDevice(DeviceId).destroyEvent(EventPtr);
if (Err)
REPORT("Failure to destroy event %p: %s\n", EventPtr,
toString(std::move(Err)).data());
return (bool)Err;
}
void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
InfoLevel.store(NewInfoLevel);
}
int32_t __tgt_rtl_init_async_info(int32_t DeviceId,
__tgt_async_info **AsyncInfoPtr) {
assert(AsyncInfoPtr && "Invalid async info");
auto Err = Plugin::get().getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr);
if (Err)
REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n",
DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data());
return (bool)Err;
}
int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
__tgt_device_info *DeviceInfo,
const char **ErrStr) {
*ErrStr = "";
auto Err = Plugin::get().getDevice(DeviceId).initDeviceInfo(DeviceInfo);
if (Err)
REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n",
DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data());
return (bool)Err;
}
#ifdef __cplusplus
}
#endif