[Libomptarget] Allow the device runtime to be compiled for the host
Currently the OpenMP offloading device runtime is only expected to be compiled for the specific architecture it's targeting. This is problematic if we want to make compiling the device runtime more general via the standar `clang` driver rather than invoking the clang front-end directly. This patch addresses this by primarily changing the declare type to `nohost` so the host will not contain any of this code. Additionally we forward declare the functions that are defined via variants, otherwise these would cause problems on the host. Reviewed By: jdoerfert, tianshilei1992 Differential Revision: https://reviews.llvm.org/D125260
This commit is contained in:
parent
0a22dfcb11
commit
b4f8443d97
|
@ -18,7 +18,7 @@ namespace _OMP {
|
|||
|
||||
namespace mapping {
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
inline constexpr uint32_t MaxThreadsPerTeam = 1024;
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include "Debug.h"
|
||||
#include "Types.h"
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
namespace _OMP {
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
using namespace _OMP;
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
// defined by CGOpenMPRuntimeGPU
|
||||
extern uint32_t __omp_rtl_debug_kind;
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
using namespace _OMP;
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
extern "C" {
|
||||
void __assert_assume(bool condition) { __builtin_assume(condition); }
|
||||
|
@ -30,6 +30,10 @@ void __assert_fail(const char *assertion, const char *file, unsigned line,
|
|||
__builtin_trap();
|
||||
}
|
||||
|
||||
namespace impl {
|
||||
int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
|
||||
}
|
||||
|
||||
#pragma omp begin declare variant match( \
|
||||
device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
|
||||
int32_t vprintf(const char *, void *);
|
||||
|
@ -55,8 +59,7 @@ int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
|
|||
}
|
||||
|
||||
/// Current indentation level for the function trace. Only accessed by thread 0.
|
||||
__attribute__((loader_uninitialized))
|
||||
static uint32_t Level;
|
||||
__attribute__((loader_uninitialized)) static uint32_t Level;
|
||||
#pragma omp allocate(Level) allocator(omp_pteam_mem_alloc)
|
||||
|
||||
DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
using namespace _OMP;
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
static void inititializeRuntime(bool IsSPMD) {
|
||||
// Order is important here.
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include "Types.h"
|
||||
#include "Utils.h"
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||
|
||||
|
@ -24,6 +24,23 @@ using namespace _OMP;
|
|||
namespace _OMP {
|
||||
namespace impl {
|
||||
|
||||
// Forward declarations defined to be defined for AMDGCN and NVPTX.
|
||||
const llvm::omp::GV &getGridValue();
|
||||
uint32_t getGridDim(uint32_t n, uint16_t d);
|
||||
uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size,
|
||||
uint16_t group_size);
|
||||
uint32_t getNumHardwareThreadsInBlock();
|
||||
LaneMaskTy activemask();
|
||||
LaneMaskTy lanemaskLT();
|
||||
LaneMaskTy lanemaskGT();
|
||||
uint32_t getThreadIdInWarp();
|
||||
uint32_t getThreadIdInBlock();
|
||||
uint32_t getKernelSize();
|
||||
uint32_t getBlockId();
|
||||
uint32_t getNumberOfBlocks();
|
||||
uint32_t getWarpId();
|
||||
uint32_t getNumberOfWarpsInBlock();
|
||||
|
||||
/// AMDGCN Implementation
|
||||
///
|
||||
///{
|
||||
|
|
|
@ -13,11 +13,15 @@
|
|||
|
||||
#include "Debug.h"
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
namespace _OMP {
|
||||
namespace impl {
|
||||
|
||||
double getWTick();
|
||||
|
||||
double getWTime();
|
||||
|
||||
/// AMDGCN Implementation
|
||||
///
|
||||
///{
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
|
||||
using namespace _OMP;
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
namespace {
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ using namespace _OMP;
|
|||
|
||||
namespace {
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
void gpu_regular_warp_reduce(void *reduce_data, ShuffleReductFnTy shflFct) {
|
||||
for (uint32_t mask = mapping::getWarpSize() / 2; mask > 0; mask /= 2) {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
using namespace _OMP;
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
/// Memory implementation
|
||||
///
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include "Types.h"
|
||||
#include "Utils.h"
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
using namespace _OMP;
|
||||
|
||||
|
@ -63,6 +63,22 @@ uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) {
|
|||
}
|
||||
///}
|
||||
|
||||
// Forward declarations defined to be defined for AMDGCN and NVPTX.
|
||||
uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering);
|
||||
void namedBarrierInit();
|
||||
void namedBarrier();
|
||||
void fenceTeam(int Ordering);
|
||||
void fenceKernel(int Ordering);
|
||||
void fenceSystem(int Ordering);
|
||||
void syncWarp(__kmpc_impl_lanemask_t);
|
||||
void syncThreads();
|
||||
void syncThreadsAligned() { syncThreads(); }
|
||||
void unsetLock(omp_lock_t *);
|
||||
int testLock(omp_lock_t *);
|
||||
void initLock(omp_lock_t *);
|
||||
void destroyLock(omp_lock_t *);
|
||||
void setLock(omp_lock_t *);
|
||||
|
||||
/// AMDGCN Implementation
|
||||
///
|
||||
///{
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
|
||||
using namespace _OMP;
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, uint32_t, int32_t,
|
||||
uint64_t TaskSizeInclPrivateValues,
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
#include "Interface.h"
|
||||
#include "Mapping.h"
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
using namespace _OMP;
|
||||
|
||||
|
@ -32,6 +32,9 @@ __attribute__((used, retain, weak, optnone, cold)) void keepAlive() {
|
|||
|
||||
namespace impl {
|
||||
|
||||
void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits);
|
||||
uint64_t Pack(uint32_t LowBits, uint32_t HighBits);
|
||||
|
||||
/// AMDGCN Implementation
|
||||
///
|
||||
///{
|
||||
|
@ -72,6 +75,10 @@ uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
|
|||
|
||||
#pragma omp end declare variant
|
||||
|
||||
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
|
||||
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
|
||||
int32_t Width);
|
||||
|
||||
/// AMDGCN Implementation
|
||||
///
|
||||
///{
|
||||
|
|
|
@ -43,7 +43,7 @@ struct DynamicScheduleTracker {
|
|||
#define NOT_FINISHED 1
|
||||
#define LAST_CHUNK 2
|
||||
|
||||
#pragma omp declare target
|
||||
#pragma omp begin declare target device_type(nohost)
|
||||
|
||||
// TODO: This variable is a hack inherited from the old runtime.
|
||||
static uint64_t SHARED(Cnt);
|
||||
|
|
Loading…
Reference in New Issue