[OpenMP][libomptarget] Simplify resource managers in NextGen plugins
This patch removes the classes GenericStreamManagerTy and GenericEventManagerTy from the PluginInterface header. Differential Revision: https://reviews.llvm.org/D138769
This commit is contained in:
parent
2cb83cd288
commit
cea616f847
|
@ -133,14 +133,18 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
|
|||
|
||||
GenericDeviceTy::GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
|
||||
const llvm::omp::GV &OMPGridValues)
|
||||
: OMP_TeamLimit("OMP_TEAM_LIMIT"), OMP_NumTeams("OMP_NUM_TEAMS"),
|
||||
: MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
|
||||
OMP_NumTeams("OMP_NUM_TEAMS"),
|
||||
OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
|
||||
OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
|
||||
OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
|
||||
// Do not initialize the following two envars since they depend on the
|
||||
// device initialization. These cannot be consulted until the device is
|
||||
// initialized correctly. We intialize them in GenericDeviceTy::init().
|
||||
OMPX_TargetStackSize(), OMPX_TargetHeapSize(), MemoryManager(nullptr),
|
||||
OMPX_TargetStackSize(), OMPX_TargetHeapSize(),
|
||||
// By default, the initial number of streams and events are 32.
|
||||
OMPX_InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", 32),
|
||||
OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 32),
|
||||
DeviceId(DeviceId), GridValues(OMPGridValues),
|
||||
PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock() {
|
||||
if (OMP_NumTeams > 0)
|
||||
|
|
|
@ -398,6 +398,9 @@ private:
|
|||
/// setupDeviceEnvironment() function.
|
||||
virtual bool shouldSetupDeviceEnvironment() const { return true; }
|
||||
|
||||
/// Pointer to the memory manager or nullptr if not available.
|
||||
MemoryManagerTy *MemoryManager;
|
||||
|
||||
/// Environment variables defined by the OpenMP standard.
|
||||
Int32Envar OMP_TeamLimit;
|
||||
Int32Envar OMP_NumTeams;
|
||||
|
@ -409,10 +412,12 @@ private:
|
|||
UInt64Envar OMPX_TargetStackSize;
|
||||
UInt64Envar OMPX_TargetHeapSize;
|
||||
|
||||
/// Pointer to the memory manager or nullptr if not available.
|
||||
MemoryManagerTy *MemoryManager;
|
||||
|
||||
protected:
|
||||
/// Environment variables defined by the LLVM OpenMP implementation
|
||||
/// regarding the initial number of streams and events.
|
||||
UInt32Envar OMPX_InitialNumStreams;
|
||||
UInt32Envar OMPX_InitialNumEvents;
|
||||
|
||||
/// Array of images loaded into the device. Images are automatically
|
||||
/// deallocated by the allocator.
|
||||
llvm::SmallVector<DeviceImageTy *> LoadedImages;
|
||||
|
@ -656,11 +661,11 @@ public:
|
|||
static GenericGlobalHandlerTy *createGlobalHandler();
|
||||
};
|
||||
|
||||
/// Auxiliary interface class for GenericDeviceResourcePoolTy. This class acts
|
||||
/// as a reference to a device resource, such as a stream, and requires some
|
||||
/// basic functions to be implemented. The derived class should define an empty
|
||||
/// constructor that creates an empty and invalid resource reference. Do not
|
||||
/// create a new resource on the ctor, but on the create() function instead.
|
||||
/// Auxiliary interface class for GenericDeviceResourceManagerTy. This class
|
||||
/// acts as a reference to a device resource, such as a stream, and requires
|
||||
/// some basic functions to be implemented. The derived class should define an
|
||||
/// empty constructor that creates an empty and invalid resource reference. Do
|
||||
/// not create a new resource on the ctor, but on the create() function instead.
|
||||
struct GenericDeviceResourceRef {
|
||||
/// Create a new resource and stores a reference.
|
||||
virtual Error create(GenericDeviceTy &Device) = 0;
|
||||
|
@ -676,17 +681,17 @@ protected:
|
|||
/// operates with references to the actual resources. These reference must
|
||||
/// derive from the GenericDeviceResourceRef class and implement the create
|
||||
/// and destroy virtual functions.
|
||||
template <typename ResourceRef> class GenericDeviceResourcePoolTy {
|
||||
using ResourcePoolTy = GenericDeviceResourcePoolTy<ResourceRef>;
|
||||
template <typename ResourceRef> class GenericDeviceResourceManagerTy {
|
||||
using ResourcePoolTy = GenericDeviceResourceManagerTy<ResourceRef>;
|
||||
|
||||
public:
|
||||
/// Create an empty resource pool for a specific device.
|
||||
GenericDeviceResourcePoolTy(GenericDeviceTy &Device)
|
||||
GenericDeviceResourceManagerTy(GenericDeviceTy &Device)
|
||||
: Device(Device), NextAvailable(0) {}
|
||||
|
||||
/// Destroy the resource pool. At this point, the deinit() function should
|
||||
/// already have been executed so the resource pool should be empty.
|
||||
virtual ~GenericDeviceResourcePoolTy() {
|
||||
virtual ~GenericDeviceResourceManagerTy() {
|
||||
assert(ResourcePool.empty() && "Resource pool not empty");
|
||||
}
|
||||
|
||||
|
@ -712,7 +717,6 @@ public:
|
|||
return Plugin::success();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Get resource from the pool or create new resources.
|
||||
ResourceRef getResource() {
|
||||
const std::lock_guard<std::mutex> Lock(Mutex);
|
||||
|
@ -774,16 +778,17 @@ private:
|
|||
if (OldSize == NewSize)
|
||||
return Plugin::success();
|
||||
|
||||
if (OldSize > NewSize) {
|
||||
// Decrease the number of resources.
|
||||
auto Err = ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize);
|
||||
if (OldSize < NewSize) {
|
||||
// Increase the number of resources.
|
||||
ResourcePool.resize(NewSize);
|
||||
return Err;
|
||||
return ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize);
|
||||
}
|
||||
|
||||
// Increase the number of resources otherwise.
|
||||
// Decrease the number of resources otherwise.
|
||||
auto Err = ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize);
|
||||
ResourcePool.resize(NewSize);
|
||||
return ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize);
|
||||
|
||||
return Err;
|
||||
}
|
||||
|
||||
/// The device to which the resources belong
|
||||
|
@ -795,73 +800,10 @@ private:
|
|||
/// The next available resource in the pool.
|
||||
uint32_t NextAvailable;
|
||||
|
||||
protected:
|
||||
/// The actual resource pool.
|
||||
std::deque<ResourceRef> ResourcePool;
|
||||
};
|
||||
|
||||
/// Class implementing a common stream manager. This class can be directly used
|
||||
/// by the specific plugins if necessary. The StreamRef type should derive from
|
||||
/// the GenericDeviceResourceRef. Look at its description to know the details of
|
||||
/// their requirements.
|
||||
template <typename StreamRef>
|
||||
class GenericStreamManagerTy : public GenericDeviceResourcePoolTy<StreamRef> {
|
||||
using ResourcePoolTy = GenericDeviceResourcePoolTy<StreamRef>;
|
||||
|
||||
public:
|
||||
/// Create a stream manager with space for an initial number of streams. No
|
||||
/// stream will be created until the init() function is called.
|
||||
GenericStreamManagerTy(GenericDeviceTy &Device, uint32_t DefNumStreams = 32)
|
||||
: ResourcePoolTy(Device),
|
||||
InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", DefNumStreams) {}
|
||||
|
||||
/// Initialize the stream pool and their resources with the initial number of
|
||||
/// streams.
|
||||
Error init() { return ResourcePoolTy::init(InitialNumStreams.get()); }
|
||||
|
||||
/// Get an available stream or create new.
|
||||
StreamRef getStream() { return ResourcePoolTy::getResource(); }
|
||||
|
||||
/// Return idle stream.
|
||||
void returnStream(StreamRef Stream) {
|
||||
ResourcePoolTy::returnResource(Stream);
|
||||
}
|
||||
|
||||
private:
|
||||
/// The initial stream pool size, potentially defined by an envar.
|
||||
UInt32Envar InitialNumStreams;
|
||||
};
|
||||
|
||||
/// Class implementing a common event manager. This class can be directly used
|
||||
/// by the specific plugins if necessary. The EventRef type should derive from
|
||||
/// the GenericDeviceResourceRef. Look at its description to know the details of
|
||||
/// their requirements.
|
||||
template <typename EventRef>
|
||||
struct GenericEventManagerTy : public GenericDeviceResourcePoolTy<EventRef> {
|
||||
using ResourcePoolTy = GenericDeviceResourcePoolTy<EventRef>;
|
||||
|
||||
public:
|
||||
/// Create an event manager with space for an initial number of events. No
|
||||
/// event will be created until the init() function is called.
|
||||
GenericEventManagerTy(GenericDeviceTy &Device, uint32_t DefNumEvents = 32)
|
||||
: ResourcePoolTy(Device),
|
||||
InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", DefNumEvents) {}
|
||||
|
||||
/// Initialize the event pool and their resources with the initial number of
|
||||
/// events.
|
||||
Error init() { return ResourcePoolTy::init(InitialNumEvents.get()); }
|
||||
|
||||
/// Get an available event or create new.
|
||||
EventRef getEvent() { return ResourcePoolTy::getResource(); }
|
||||
|
||||
/// Return an idle event.
|
||||
void returnEvent(EventRef Event) { ResourcePoolTy::returnResource(Event); }
|
||||
|
||||
private:
|
||||
/// The initial event pool size, potentially defined by an envar.
|
||||
UInt32Envar InitialNumEvents;
|
||||
};
|
||||
|
||||
} // namespace plugin
|
||||
} // namespace target
|
||||
} // namespace omp
|
||||
|
|
|
@ -34,8 +34,6 @@ namespace plugin {
|
|||
struct CUDAKernelTy;
|
||||
struct CUDADeviceTy;
|
||||
struct CUDAPluginTy;
|
||||
struct CUDAStreamManagerTy;
|
||||
struct CUDAEventManagerTy;
|
||||
|
||||
/// Class implementing the CUDA kernel functionalities which derives from the
|
||||
/// generic kernel class.
|
||||
|
@ -260,11 +258,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
|||
return Err;
|
||||
|
||||
// Initialize stream pool.
|
||||
if (auto Err = CUDAStreamManager.init())
|
||||
if (auto Err = CUDAStreamManager.init(OMPX_InitialNumStreams))
|
||||
return Err;
|
||||
|
||||
// Initialize event pool.
|
||||
if (auto Err = CUDAEventManager.init())
|
||||
if (auto Err = CUDAEventManager.init(OMPX_InitialNumEvents))
|
||||
return Err;
|
||||
|
||||
// Query attributes to determine number of threads/block and blocks/grid.
|
||||
|
@ -383,7 +381,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
|||
CUstream getStream(AsyncInfoWrapperTy &AsyncInfoWrapper) {
|
||||
CUstream &Stream = AsyncInfoWrapper.getQueueAs<CUstream>();
|
||||
if (!Stream)
|
||||
Stream = CUDAStreamManager.getStream();
|
||||
Stream = CUDAStreamManager.getResource();
|
||||
return Stream;
|
||||
}
|
||||
|
||||
|
@ -482,7 +480,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
|||
// Once the stream is synchronized, return it to stream pool and reset
|
||||
// AsyncInfo. This is to make sure the synchronization only works for its
|
||||
// own tasks.
|
||||
CUDAStreamManager.returnStream(Stream);
|
||||
CUDAStreamManager.returnResource(Stream);
|
||||
AsyncInfo.Queue = nullptr;
|
||||
|
||||
return Plugin::check(Res, "Error in cuStreamSynchronize: %s");
|
||||
|
@ -553,14 +551,14 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
|||
/// Create an event.
|
||||
Error createEventImpl(void **EventPtrStorage) override {
|
||||
CUevent *Event = reinterpret_cast<CUevent *>(EventPtrStorage);
|
||||
*Event = CUDAEventManager.getEvent();
|
||||
*Event = CUDAEventManager.getResource();
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
/// Destroy a previously created event.
|
||||
Error destroyEventImpl(void *EventPtr) override {
|
||||
CUevent Event = reinterpret_cast<CUevent>(EventPtr);
|
||||
CUDAEventManager.returnEvent(Event);
|
||||
CUDAEventManager.returnResource(Event);
|
||||
return Plugin::success();
|
||||
}
|
||||
|
||||
|
@ -779,8 +777,8 @@ struct CUDADeviceTy : public GenericDeviceTy {
|
|||
}
|
||||
|
||||
private:
|
||||
using CUDAStreamManagerTy = GenericStreamManagerTy<CUDAStreamRef>;
|
||||
using CUDAEventManagerTy = GenericEventManagerTy<CUDAEventRef>;
|
||||
using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
|
||||
using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;
|
||||
|
||||
/// Stream manager for CUDA streams.
|
||||
CUDAStreamManagerTy CUDAStreamManager;
|
||||
|
|
Loading…
Reference in New Issue