forked from OSchip/llvm-project
895 lines
31 KiB
C++
895 lines
31 KiB
C++
//===- PluginInterface.cpp - Target independent plugin device interface ---===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "PluginInterface.h"
|
|
#include "Debug.h"
|
|
#include "GlobalHandler.h"
|
|
#include "elf_common.h"
|
|
#include "omptarget.h"
|
|
#include "omptargetplugin.h"
|
|
|
|
#include <cstdint>
|
|
#include <limits>
|
|
|
|
using namespace llvm;
|
|
using namespace omp;
|
|
using namespace target;
|
|
using namespace plugin;
|
|
|
|
GenericPluginTy *Plugin::SpecificPlugin = nullptr;
|
|
|
|
AsyncInfoWrapperTy::~AsyncInfoWrapperTy() {
|
|
// If we used a local async info object we want synchronous behavior.
|
|
// In that case, and assuming the current status code is OK, we will
|
|
// synchronize explicitly when the object is deleted.
|
|
if (AsyncInfoPtr == &LocalAsyncInfo && LocalAsyncInfo.Queue && !Err)
|
|
Err = Device.synchronize(&LocalAsyncInfo);
|
|
}
|
|
|
|
Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
|
|
DeviceImageTy &Image) {
|
|
PreferredNumThreads = getDefaultNumThreads(GenericDevice);
|
|
if (isGenericMode())
|
|
PreferredNumThreads += GenericDevice.getWarpSize();
|
|
|
|
MaxNumThreads = GenericDevice.getThreadLimit();
|
|
|
|
DynamicMemorySize = GenericDevice.getDynamicMemorySize();
|
|
|
|
return initImpl(GenericDevice, Image);
|
|
}
|
|
|
|
Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
|
|
ptrdiff_t *ArgOffsets, int32_t NumArgs,
|
|
uint64_t NumTeamsClause,
|
|
uint32_t ThreadLimitClause,
|
|
uint64_t LoopTripCount,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
|
|
llvm::SmallVector<void *, 16> Args;
|
|
llvm::SmallVector<void *, 16> Ptrs;
|
|
|
|
void *KernelArgsPtr = prepareArgs(GenericDevice, ArgPtrs, ArgOffsets, NumArgs,
|
|
Args, Ptrs, AsyncInfoWrapper);
|
|
|
|
uint32_t NumThreads = getNumThreads(GenericDevice, ThreadLimitClause);
|
|
uint64_t NumBlocks =
|
|
getNumBlocks(GenericDevice, NumTeamsClause, LoopTripCount, NumThreads);
|
|
|
|
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
|
|
"Launching kernel %s with %" PRIu64
|
|
" blocks and %d threads in %s mode\n",
|
|
getName(), NumBlocks, NumThreads, getExecutionModeName());
|
|
|
|
return launchImpl(GenericDevice, NumThreads, NumBlocks, DynamicMemorySize,
|
|
NumArgs, KernelArgsPtr, AsyncInfoWrapper);
|
|
}
|
|
|
|
void *GenericKernelTy::prepareArgs(GenericDeviceTy &GenericDevice,
|
|
void **ArgPtrs, ptrdiff_t *ArgOffsets,
|
|
int32_t NumArgs,
|
|
llvm::SmallVectorImpl<void *> &Args,
|
|
llvm::SmallVectorImpl<void *> &Ptrs,
|
|
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
|
|
Args.resize(NumArgs);
|
|
Ptrs.resize(NumArgs);
|
|
|
|
if (NumArgs == 0)
|
|
return nullptr;
|
|
|
|
for (int I = 0; I < NumArgs; ++I) {
|
|
Ptrs[I] = (void *)((intptr_t)ArgPtrs[I] + ArgOffsets[I]);
|
|
Args[I] = &Ptrs[I];
|
|
}
|
|
return &Args[0];
|
|
}
|
|
|
|
uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
|
|
uint32_t ThreadLimitClause) const {
|
|
return std::min(MaxNumThreads, (ThreadLimitClause > 0) ? ThreadLimitClause
|
|
: PreferredNumThreads);
|
|
}
|
|
|
|
uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
|
|
uint64_t NumTeamsClause,
|
|
uint64_t LoopTripCount,
|
|
uint32_t NumThreads) const {
|
|
uint64_t PreferredNumBlocks = getDefaultNumBlocks(GenericDevice);
|
|
if (NumTeamsClause > 0) {
|
|
PreferredNumBlocks = NumTeamsClause;
|
|
} else if (LoopTripCount > 0) {
|
|
if (isSPMDMode()) {
|
|
// We have a combined construct, i.e. `target teams distribute
|
|
// parallel for [simd]`. We launch so many teams so that each thread
|
|
// will execute one iteration of the loop. round up to the nearest
|
|
// integer
|
|
PreferredNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
|
|
} else {
|
|
assert((isGenericMode() || isGenericSPMDMode()) &&
|
|
"Unexpected execution mode!");
|
|
// If we reach this point, then we have a non-combined construct, i.e.
|
|
// `teams distribute` with a nested `parallel for` and each team is
|
|
// assigned one iteration of the `distribute` loop. E.g.:
|
|
//
|
|
// #pragma omp target teams distribute
|
|
// for(...loop_tripcount...) {
|
|
// #pragma omp parallel for
|
|
// for(...) {}
|
|
// }
|
|
//
|
|
// Threads within a team will execute the iterations of the `parallel`
|
|
// loop.
|
|
PreferredNumBlocks = LoopTripCount;
|
|
}
|
|
}
|
|
return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
|
|
}
|
|
|
|
GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
|
|
const llvm::omp::GV &OMPGridValues)
|
|
: OMP_TeamLimit("OMP_TEAM_LIMIT"), OMP_NumTeams("OMP_NUM_TEAMS"),
|
|
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
|
|
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
|
|
OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
|
|
// Do not initialize the following two envars since they depend on the
|
|
// device initialization. These cannot be consulted until the device is
|
|
// initialized correctly. We intialize them in GenericDeviceTy::init().
|
|
OMPX_TargetStackSize(), OMPX_TargetHeapSize(), MemoryManager(nullptr),
|
|
DeviceId(DeviceId), GridValues(OMPGridValues),
|
|
PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock() {
|
|
if (OMP_NumTeams > 0)
|
|
GridValues.GV_Max_Teams =
|
|
std::min(GridValues.GV_Max_Teams, uint32_t(OMP_NumTeams));
|
|
|
|
if (OMP_TeamsThreadLimit > 0)
|
|
GridValues.GV_Max_WG_Size =
|
|
std::min(GridValues.GV_Max_WG_Size, uint32_t(OMP_TeamsThreadLimit));
|
|
}
|
|
|
|
Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
|
|
if (auto Err = initImpl(Plugin))
|
|
return Err;
|
|
|
|
// Read and reinitialize the envars that depend on the device initialization.
|
|
// Notice these two envars may change the stack size and heap size of the
|
|
// device, so they need the device properly initialized.
|
|
auto StackSizeEnvarOrErr = UInt64Envar::create(
|
|
"LIBOMPTARGET_STACK_SIZE",
|
|
[this](uint64_t &V) -> Error { return getDeviceStackSize(V); },
|
|
[this](uint64_t V) -> Error { return setDeviceStackSize(V); });
|
|
if (!StackSizeEnvarOrErr)
|
|
return StackSizeEnvarOrErr.takeError();
|
|
OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr);
|
|
|
|
auto HeapSizeEnvarOrErr = UInt64Envar::create(
|
|
"LIBOMPTARGET_HEAP_SIZE",
|
|
[this](uint64_t &V) -> Error { return getDeviceHeapSize(V); },
|
|
[this](uint64_t V) -> Error { return setDeviceHeapSize(V); });
|
|
if (!HeapSizeEnvarOrErr)
|
|
return HeapSizeEnvarOrErr.takeError();
|
|
OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr);
|
|
|
|
// Enable the memory manager if required.
|
|
auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv();
|
|
if (EnableMM)
|
|
MemoryManager = new MemoryManagerTy(*this, ThresholdMM);
|
|
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error GenericDeviceTy::deinit() {
|
|
// Delete the memory manager before deinitilizing the device. Otherwise,
|
|
// we may delete device allocations after the device is deinitialized.
|
|
if (MemoryManager)
|
|
delete MemoryManager;
|
|
MemoryManager = nullptr;
|
|
|
|
return deinitImpl();
|
|
}
|
|
|
|
Expected<__tgt_target_table *>
|
|
GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
|
|
const __tgt_device_image *TgtImage) {
|
|
DP("Load data from image " DPxMOD "\n", DPxPTR(TgtImage->ImageStart));
|
|
|
|
// Load the binary and allocate the image object. Use the next available id
|
|
// for the image id, which is the number of previously loaded images.
|
|
auto ImageOrErr = loadBinaryImpl(TgtImage, LoadedImages.size());
|
|
if (!ImageOrErr)
|
|
return ImageOrErr.takeError();
|
|
|
|
DeviceImageTy *Image = *ImageOrErr;
|
|
assert(Image != nullptr && "Invalid image");
|
|
|
|
// Add the image to list.
|
|
LoadedImages.push_back(Image);
|
|
|
|
// Setup the device environment if needed.
|
|
if (auto Err = setupDeviceEnvironment(Plugin, *Image))
|
|
return std::move(Err);
|
|
|
|
// Register all offload entries of the image.
|
|
if (auto Err = registerOffloadEntries(*Image))
|
|
return std::move(Err);
|
|
|
|
// Return the pointer to the table of entries.
|
|
return Image->getOffloadEntryTable();
|
|
}
|
|
|
|
Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin,
|
|
DeviceImageTy &Image) {
|
|
// There are some plugins that do not need this step.
|
|
if (!shouldSetupDeviceEnvironment())
|
|
return Plugin::success();
|
|
|
|
DeviceEnvironmentTy DeviceEnvironment;
|
|
DeviceEnvironment.DebugKind = OMPX_DebugKind;
|
|
DeviceEnvironment.NumDevices = Plugin.getNumDevices();
|
|
// TODO: The device ID used here is not the real device ID used by OpenMP.
|
|
DeviceEnvironment.DeviceNum = DeviceId;
|
|
DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize;
|
|
|
|
// Create the metainfo of the device environment global.
|
|
GlobalTy DevEnvGlobal("omptarget_device_environment",
|
|
sizeof(DeviceEnvironmentTy), &DeviceEnvironment);
|
|
|
|
// Write device environment values to the device.
|
|
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
|
|
if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) {
|
|
DP("Missing symbol %s, continue execution anyway.\n",
|
|
DevEnvGlobal.getName().data());
|
|
consumeError(std::move(Err));
|
|
}
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error GenericDeviceTy::registerOffloadEntries(DeviceImageTy &Image) {
|
|
const __tgt_offload_entry *Begin = Image.getTgtImage()->EntriesBegin;
|
|
const __tgt_offload_entry *End = Image.getTgtImage()->EntriesEnd;
|
|
for (const __tgt_offload_entry *Entry = Begin; Entry != End; ++Entry) {
|
|
// The host should have always something in the address to uniquely
|
|
// identify the entry.
|
|
if (!Entry->addr)
|
|
return Plugin::error("Failure to register entry without address");
|
|
|
|
__tgt_offload_entry DeviceEntry = {0};
|
|
|
|
if (Entry->size) {
|
|
if (auto Err = registerGlobalOffloadEntry(Image, *Entry, DeviceEntry))
|
|
return Err;
|
|
} else {
|
|
if (auto Err = registerKernelOffloadEntry(Image, *Entry, DeviceEntry))
|
|
return Err;
|
|
}
|
|
|
|
assert(DeviceEntry.addr && "Device addr of offload entry cannot be null");
|
|
|
|
DP("Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n",
|
|
DPxPTR(Entry - Begin), (Entry->size) ? " global" : "", Entry->name,
|
|
DPxPTR(DeviceEntry.addr));
|
|
}
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error GenericDeviceTy::registerGlobalOffloadEntry(
|
|
DeviceImageTy &Image, const __tgt_offload_entry &GlobalEntry,
|
|
__tgt_offload_entry &DeviceEntry) {
|
|
|
|
GenericPluginTy &Plugin = Plugin::get();
|
|
|
|
DeviceEntry = GlobalEntry;
|
|
|
|
// Create a metadata object for the device global.
|
|
GlobalTy DeviceGlobal(GlobalEntry.name, GlobalEntry.size);
|
|
|
|
// Get the address of the device of the global.
|
|
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
|
|
if (auto Err =
|
|
GHandler.getGlobalMetadataFromDevice(*this, Image, DeviceGlobal))
|
|
return Err;
|
|
|
|
// Store the device address on the device entry.
|
|
DeviceEntry.addr = DeviceGlobal.getPtr();
|
|
assert(DeviceEntry.addr && "Invalid device global's address");
|
|
|
|
// Note: In the current implementation declare target variables
|
|
// can either be link or to. This means that once unified
|
|
// memory is activated via the requires directive, the variable
|
|
// can be used directly from the host in both cases.
|
|
if (Plugin.getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
|
// If unified memory is present any target link or to variables
|
|
// can access host addresses directly. There is no longer a
|
|
// need for device copies.
|
|
GlobalTy HostGlobal(GlobalEntry);
|
|
if (auto Err = GHandler.writeGlobalToDevice(*this, Image, HostGlobal,
|
|
DeviceGlobal))
|
|
return Err;
|
|
}
|
|
|
|
// Add the device entry on the entry table.
|
|
Image.getOffloadEntryTable().addEntry(DeviceEntry);
|
|
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error GenericDeviceTy::registerKernelOffloadEntry(
|
|
DeviceImageTy &Image, const __tgt_offload_entry &KernelEntry,
|
|
__tgt_offload_entry &DeviceEntry) {
|
|
DeviceEntry = KernelEntry;
|
|
|
|
// Create a kernel object.
|
|
auto KernelOrErr = constructKernelEntry(KernelEntry, Image);
|
|
if (!KernelOrErr)
|
|
return KernelOrErr.takeError();
|
|
|
|
GenericKernelTy *Kernel = *KernelOrErr;
|
|
assert(Kernel != nullptr && "Invalid kernel");
|
|
|
|
// Initialize the kernel.
|
|
if (auto Err = Kernel->init(*this, Image))
|
|
return Err;
|
|
|
|
// Set the device entry address to the kernel address and store the entry on
|
|
// the entry table.
|
|
DeviceEntry.addr = (void *)Kernel;
|
|
Image.getOffloadEntryTable().addEntry(DeviceEntry);
|
|
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
|
|
if (!AsyncInfo || !AsyncInfo->Queue)
|
|
return Plugin::error("Invalid async info queue");
|
|
|
|
return synchronizeImpl(*AsyncInfo);
|
|
}
|
|
|
|
Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
|
|
TargetAllocTy Kind) {
|
|
void *Alloc = nullptr;
|
|
|
|
switch (Kind) {
|
|
case TARGET_ALLOC_DEFAULT:
|
|
case TARGET_ALLOC_DEVICE:
|
|
if (MemoryManager) {
|
|
Alloc = MemoryManager->allocate(Size, HostPtr);
|
|
if (!Alloc)
|
|
return Plugin::error("Failed to allocate from memory manager");
|
|
break;
|
|
}
|
|
[[fallthrough]];
|
|
case TARGET_ALLOC_HOST:
|
|
case TARGET_ALLOC_SHARED:
|
|
Alloc = allocate(Size, HostPtr, Kind);
|
|
if (!Alloc)
|
|
return Plugin::error("Failed to allocate from device allocator");
|
|
}
|
|
|
|
// Sucessful and valid allocation.
|
|
if (Alloc)
|
|
return Alloc;
|
|
|
|
// At this point means that we did not tried to allocate from the memory
|
|
// manager nor the device allocator.
|
|
return Plugin::error("Invalid target data allocation kind or requested "
|
|
"allocator not implemented yet");
|
|
}
|
|
|
|
Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
|
|
int Res;
|
|
if (MemoryManager)
|
|
Res = MemoryManager->free(TgtPtr);
|
|
else
|
|
Res = free(TgtPtr, Kind);
|
|
|
|
if (Res)
|
|
return Plugin::error("Failure to deallocate device pointer %p", TgtPtr);
|
|
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr,
|
|
int64_t Size, __tgt_async_info *AsyncInfo) {
|
|
auto Err = Plugin::success();
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
|
|
Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper);
|
|
return Err;
|
|
}
|
|
|
|
Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr,
|
|
int64_t Size, __tgt_async_info *AsyncInfo) {
|
|
auto Err = Plugin::success();
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
|
|
Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper);
|
|
return Err;
|
|
}
|
|
|
|
Error GenericDeviceTy::dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev,
|
|
void *DstPtr, int64_t Size,
|
|
__tgt_async_info *AsyncInfo) {
|
|
auto Err = Plugin::success();
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
|
|
Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper);
|
|
return Err;
|
|
}
|
|
|
|
Error GenericDeviceTy::runTargetTeamRegion(
|
|
void *EntryPtr, void **ArgPtrs, ptrdiff_t *ArgOffsets, int32_t NumArgs,
|
|
uint64_t NumTeamsClause, uint32_t ThreadLimitClause, uint64_t LoopTripCount,
|
|
__tgt_async_info *AsyncInfo) {
|
|
auto Err = Plugin::success();
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
|
|
|
|
GenericKernelTy &GenericKernel =
|
|
*reinterpret_cast<GenericKernelTy *>(EntryPtr);
|
|
|
|
Err =
|
|
GenericKernel.launch(*this, ArgPtrs, ArgOffsets, NumArgs, NumTeamsClause,
|
|
ThreadLimitClause, LoopTripCount, AsyncInfoWrapper);
|
|
return Err;
|
|
}
|
|
|
|
Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) {
|
|
assert(AsyncInfoPtr && "Invalid async info");
|
|
|
|
*AsyncInfoPtr = new __tgt_async_info();
|
|
|
|
auto Err = Plugin::success();
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, *AsyncInfoPtr);
|
|
Err = initAsyncInfoImpl(AsyncInfoWrapper);
|
|
return Err;
|
|
}
|
|
|
|
Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
|
|
assert(DeviceInfo && "Invalid device info");
|
|
|
|
return initDeviceInfoImpl(DeviceInfo);
|
|
}
|
|
|
|
Error GenericDeviceTy::printInfo() {
|
|
// TODO: Print generic information here
|
|
return printInfoImpl();
|
|
}
|
|
|
|
Error GenericDeviceTy::createEvent(void **EventPtrStorage) {
|
|
return createEventImpl(EventPtrStorage);
|
|
}
|
|
|
|
Error GenericDeviceTy::destroyEvent(void *EventPtr) {
|
|
return destroyEventImpl(EventPtr);
|
|
}
|
|
|
|
Error GenericDeviceTy::recordEvent(void *EventPtr,
|
|
__tgt_async_info *AsyncInfo) {
|
|
auto Err = Plugin::success();
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
|
|
Err = recordEventImpl(EventPtr, AsyncInfoWrapper);
|
|
return Err;
|
|
}
|
|
|
|
Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
|
|
auto Err = Plugin::success();
|
|
AsyncInfoWrapperTy AsyncInfoWrapper(Err, *this, AsyncInfo);
|
|
Err = waitEventImpl(EventPtr, AsyncInfoWrapper);
|
|
return Err;
|
|
}
|
|
|
|
Error GenericDeviceTy::syncEvent(void *EventPtr) {
|
|
return syncEventImpl(EventPtr);
|
|
}
|
|
|
|
Error GenericPluginTy::init() {
|
|
auto NumDevicesOrErr = initImpl();
|
|
if (!NumDevicesOrErr)
|
|
return NumDevicesOrErr.takeError();
|
|
|
|
NumDevices = *NumDevicesOrErr;
|
|
if (NumDevices == 0)
|
|
return Plugin::success();
|
|
|
|
assert(Devices.size() == 0 && "Plugin already initialized");
|
|
Devices.resize(NumDevices, nullptr);
|
|
|
|
GlobalHandler = Plugin::createGlobalHandler();
|
|
assert(GlobalHandler && "Invalid global handler");
|
|
|
|
return Plugin::success();
|
|
}
|
|
|
|
Error GenericPluginTy::deinit() {
|
|
// There is no global handler if no device is available.
|
|
if (GlobalHandler)
|
|
delete GlobalHandler;
|
|
|
|
// Deinitialize all active devices.
|
|
for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) {
|
|
if (Devices[DeviceId]) {
|
|
if (auto Err = deinitDevice(DeviceId))
|
|
return Err;
|
|
}
|
|
assert(!Devices[DeviceId] && "Device was not deinitialized");
|
|
}
|
|
|
|
// Perform last deinitializations on the plugin.
|
|
return deinitImpl();
|
|
}
|
|
|
|
Error GenericPluginTy::initDevice(int32_t DeviceId) {
|
|
assert(!Devices[DeviceId] && "Device already initialized");
|
|
|
|
// Create the device and save the reference.
|
|
GenericDeviceTy *Device = Plugin::createDevice(DeviceId, NumDevices);
|
|
assert(Device && "Invalid device");
|
|
|
|
// Save the device reference into the list.
|
|
Devices[DeviceId] = Device;
|
|
|
|
// Initialize the device and its resources.
|
|
return Device->init(*this);
|
|
}
|
|
|
|
Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
|
|
// The device may be already deinitialized.
|
|
if (Devices[DeviceId] == nullptr)
|
|
return Plugin::success();
|
|
|
|
// Deinitialize the device and release its resources.
|
|
if (auto Err = Devices[DeviceId]->deinit())
|
|
return Err;
|
|
|
|
// Delete the device and invalidate its reference.
|
|
delete Devices[DeviceId];
|
|
Devices[DeviceId] = nullptr;
|
|
|
|
return Plugin::success();
|
|
}
|
|
|
|
/// Exposed library API function, basically wrappers around the GenericDeviceTy
|
|
/// functionality with the same name. All non-async functions are redirected
|
|
/// to the async versions right away with a NULL AsyncInfoPtr.
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
int32_t __tgt_rtl_init_plugin() {
|
|
auto Err = Plugin::initIfNeeded();
|
|
if (Err)
|
|
REPORT("Failure to initialize plugin " GETNAME(TARGET_NAME) ": %s\n",
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_deinit_plugin() {
|
|
auto Err = Plugin::deinitIfNeeded();
|
|
if (Err)
|
|
REPORT("Failure to deinitialize plugin " GETNAME(TARGET_NAME) ": %s\n",
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *TgtImage) {
|
|
if (!Plugin::isActive())
|
|
return false;
|
|
|
|
return elf_check_machine(TgtImage, Plugin::get().getMagicElfBits());
|
|
}
|
|
|
|
int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *TgtImage,
|
|
__tgt_image_info *Info) {
|
|
if (!Plugin::isActive())
|
|
return false;
|
|
|
|
if (!__tgt_rtl_is_valid_binary(TgtImage))
|
|
return false;
|
|
|
|
// A subarchitecture was not specified. Assume it is compatible.
|
|
if (!Info->Arch)
|
|
return true;
|
|
|
|
// Check the compatibility with all the available devices. Notice the
|
|
// devices may not be initialized yet.
|
|
auto CompatibleOrErr = Plugin::get().isImageCompatible(Info);
|
|
if (!CompatibleOrErr) {
|
|
// This error should not abort the execution, so we just inform the user
|
|
// through the debug system.
|
|
std::string ErrString = toString(CompatibleOrErr.takeError());
|
|
DP("Failure to check whether image %p is valid: %s\n", TgtImage,
|
|
ErrString.data());
|
|
return false;
|
|
}
|
|
|
|
bool Compatible = *CompatibleOrErr;
|
|
DP("Image is %scompatible with current environment: %s\n",
|
|
(Compatible) ? "" : "not", Info->Arch);
|
|
|
|
return Compatible;
|
|
}
|
|
|
|
int32_t __tgt_rtl_supports_empty_images() {
|
|
return Plugin::get().supportsEmptyImages();
|
|
}
|
|
|
|
int32_t __tgt_rtl_init_device(int32_t DeviceId) {
|
|
auto Err = Plugin::get().initDevice(DeviceId);
|
|
if (Err)
|
|
REPORT("Failure to initialize device %d: %s\n", DeviceId,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_deinit_device(int32_t DeviceId) {
|
|
auto Err = Plugin::get().deinitDevice(DeviceId);
|
|
if (Err)
|
|
REPORT("Failure to deinitialize device %d: %s\n", DeviceId,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_number_of_devices() { return Plugin::get().getNumDevices(); }
|
|
|
|
int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
|
|
Plugin::get().setRequiresFlag(RequiresFlags);
|
|
return RequiresFlags;
|
|
}
|
|
|
|
int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId,
|
|
int32_t DstDeviceId) {
|
|
return Plugin::get().isDataExchangable(SrcDeviceId, DstDeviceId);
|
|
}
|
|
|
|
__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId,
|
|
__tgt_device_image *TgtImage) {
|
|
GenericPluginTy &Plugin = Plugin::get();
|
|
auto TableOrErr = Plugin.getDevice(DeviceId).loadBinary(Plugin, TgtImage);
|
|
if (!TableOrErr) {
|
|
auto Err = TableOrErr.takeError();
|
|
REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage,
|
|
DeviceId, toString(std::move(Err)).data());
|
|
return nullptr;
|
|
}
|
|
|
|
__tgt_target_table *Table = *TableOrErr;
|
|
assert(Table != nullptr && "Invalid table");
|
|
|
|
return Table;
|
|
}
|
|
|
|
void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
|
|
int32_t Kind) {
|
|
auto AllocOrErr = Plugin::get().getDevice(DeviceId).dataAlloc(
|
|
Size, HostPtr, (TargetAllocTy)Kind);
|
|
if (!AllocOrErr) {
|
|
auto Err = AllocOrErr.takeError();
|
|
REPORT("Failure to allocate device memory: %s\n",
|
|
toString(std::move(Err)).data());
|
|
return nullptr;
|
|
}
|
|
assert(*AllocOrErr && "Null pointer upon successful allocation");
|
|
|
|
return *AllocOrErr;
|
|
}
|
|
|
|
int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) {
|
|
auto Err =
|
|
Plugin::get().getDevice(DeviceId).dataDelete(TgtPtr, (TargetAllocTy)Kind);
|
|
if (Err)
|
|
REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
|
|
int64_t Size) {
|
|
return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size,
|
|
/* AsyncInfoPtr */ nullptr);
|
|
}
|
|
|
|
int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
|
|
void *HstPtr, int64_t Size,
|
|
__tgt_async_info *AsyncInfoPtr) {
|
|
auto Err = Plugin::get().getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size,
|
|
AsyncInfoPtr);
|
|
if (Err)
|
|
REPORT("Failure to copy data from host to device. Pointers: host "
|
|
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
|
|
DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
|
|
int64_t Size) {
|
|
return __tgt_rtl_data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size,
|
|
/* AsyncInfoPtr */ nullptr);
|
|
}
|
|
|
|
int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
|
|
void *TgtPtr, int64_t Size,
|
|
__tgt_async_info *AsyncInfoPtr) {
|
|
auto Err = Plugin::get().getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr,
|
|
Size, AsyncInfoPtr);
|
|
if (Err)
|
|
REPORT("Faliure to copy data from device to host. Pointers: host "
|
|
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
|
|
DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
|
|
int32_t DstDeviceId, void *DstPtr,
|
|
int64_t Size) {
|
|
return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
|
|
Size, /* AsyncInfoPtr */ nullptr);
|
|
}
|
|
|
|
int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
|
|
int DstDeviceId, void *DstPtr,
|
|
int64_t Size,
|
|
__tgt_async_info *AsyncInfo) {
|
|
GenericDeviceTy &SrcDevice = Plugin::get().getDevice(SrcDeviceId);
|
|
GenericDeviceTy &DstDevice = Plugin::get().getDevice(DstDeviceId);
|
|
auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo);
|
|
if (Err)
|
|
REPORT("Failure to copy data from device (%d) to device (%d). Pointers: "
|
|
"host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
|
|
SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,
|
|
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
|
int32_t NumArgs, int32_t NumTeams,
|
|
int32_t ThreadLimit,
|
|
uint64_t LoopTripCount) {
|
|
return __tgt_rtl_run_target_team_region_async(DeviceId, TgtEntryPtr, TgtArgs,
|
|
TgtOffsets, NumArgs, NumTeams,
|
|
ThreadLimit, LoopTripCount,
|
|
/* AsyncInfoPtr */ nullptr);
|
|
}
|
|
|
|
int32_t __tgt_rtl_run_target_team_region_async(
|
|
int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets,
|
|
int32_t NumArgs, int32_t NumTeams, int32_t ThreadLimit,
|
|
uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr) {
|
|
auto Err = Plugin::get().getDevice(DeviceId).runTargetTeamRegion(
|
|
TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs, NumTeams, ThreadLimit,
|
|
LoopTripCount, AsyncInfoPtr);
|
|
if (Err)
|
|
REPORT("Failure to run target region " DPxMOD " in device %d: %s\n",
|
|
DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_synchronize(int32_t DeviceId,
|
|
__tgt_async_info *AsyncInfoPtr) {
|
|
auto Err = Plugin::get().getDevice(DeviceId).synchronize(AsyncInfoPtr);
|
|
if (Err)
|
|
REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr,
|
|
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
|
int32_t NumArgs) {
|
|
return __tgt_rtl_run_target_region_async(DeviceId, TgtEntryPtr, TgtArgs,
|
|
TgtOffsets, NumArgs,
|
|
/* AsyncInfoPtr */ nullptr);
|
|
}
|
|
|
|
int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr,
|
|
void **TgtArgs, ptrdiff_t *TgtOffsets,
|
|
int32_t NumArgs,
|
|
__tgt_async_info *AsyncInfoPtr) {
|
|
return __tgt_rtl_run_target_team_region_async(
|
|
DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, NumArgs,
|
|
/* team num*/ 1, /* thread limit */ 1, /* loop tripcount */ 0,
|
|
AsyncInfoPtr);
|
|
}
|
|
|
|
void __tgt_rtl_print_device_info(int32_t DeviceId) {
|
|
if (auto Err = Plugin::get().getDevice(DeviceId).printInfo())
|
|
REPORT("Failure to print device %d info: %s\n", DeviceId,
|
|
toString(std::move(Err)).data());
|
|
}
|
|
|
|
int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) {
|
|
auto Err = Plugin::get().getDevice(DeviceId).createEvent(EventPtr);
|
|
if (Err)
|
|
REPORT("Failure to create event: %s\n", toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr,
|
|
__tgt_async_info *AsyncInfoPtr) {
|
|
auto Err =
|
|
Plugin::get().getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr);
|
|
if (Err)
|
|
REPORT("Failure to record event %p: %s\n", EventPtr,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr,
|
|
__tgt_async_info *AsyncInfoPtr) {
|
|
auto Err =
|
|
Plugin::get().getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr);
|
|
if (Err)
|
|
REPORT("Failure to wait event %p: %s\n", EventPtr,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) {
|
|
auto Err = Plugin::get().getDevice(DeviceId).syncEvent(EventPtr);
|
|
if (Err)
|
|
REPORT("Failure to synchronize event %p: %s\n", EventPtr,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) {
|
|
auto Err = Plugin::get().getDevice(DeviceId).destroyEvent(EventPtr);
|
|
if (Err)
|
|
REPORT("Failure to destroy event %p: %s\n", EventPtr,
|
|
toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
|
|
std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
|
|
InfoLevel.store(NewInfoLevel);
|
|
}
|
|
|
|
int32_t __tgt_rtl_init_async_info(int32_t DeviceId,
|
|
__tgt_async_info **AsyncInfoPtr) {
|
|
assert(AsyncInfoPtr && "Invalid async info");
|
|
|
|
auto Err = Plugin::get().getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr);
|
|
if (Err)
|
|
REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n",
|
|
DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
|
|
__tgt_device_info *DeviceInfo,
|
|
const char **ErrStr) {
|
|
*ErrStr = "";
|
|
|
|
auto Err = Plugin::get().getDevice(DeviceId).initDeviceInfo(DeviceInfo);
|
|
if (Err)
|
|
REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n",
|
|
DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data());
|
|
|
|
return (bool)Err;
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|