23 #ifndef HEDGEHOG_ABSTRACT_CUDA_TASK_H 24 #define HEDGEHOG_ABSTRACT_CUDA_TASK_H 26 #include <unordered_set> 28 #include <cuda_runtime.h> 29 #include "abstract_task.h" 33 #ifndef checkCudaErrors 34 inline void __checkCudaErrors(cudaError_t err,
const char *file,
const int line) {
40 if (cudaSuccess != err) {
41 std::cerr <<
"checkCudaErrors() Cuda error = " 43 <<
"\"" << cudaGetErrorString(err) <<
" \" from " 44 << file <<
":" << line << std::endl;
55 if (CUBLAS_STATUS_SUCCESS != status) {
56 std::cerr <<
"checkCudaErrors() Status Error = " 58 << file <<
":" << line << std::endl;
62 #define checkCudaErrors(err) __checkCudaErrors(err, __FILE__, __LINE__) 77 template<
class TaskOutput,
class ... TaskInputs>
79 static_assert(traits::isUnique < TaskInputs...>,
"A Task can't accept multiple inputs with the same type.");
80 static_assert(
sizeof... (TaskInputs) >= 1,
"A node need to have one output type and at least one output type.");
94 enablePeerAccess_(true) {
95 this->
core()->isCudaRelated(
true);
105 enablePeerAccess_(true) {
106 this->
core()->isCudaRelated(
true);
116 :
AbstractTask<TaskOutput, TaskInputs...>(name, numberThreads, automaticStart),
117 enablePeerAccess_(enablePeerAccess) {
118 this->
core()->isCudaRelated(
true);
126 checkCudaErrors(cudaGetDeviceCount(&numGpus));
128 checkCudaErrors(cudaSetDevice(this->
deviceId()));
129 checkCudaErrors(cudaStreamCreate(&stream_));
131 if (enablePeerAccess_) {
132 for (
int i = 0; i < numGpus; ++i) {
134 checkCudaErrors(cudaDeviceCanAccessPeer(&canAccess, this->
deviceId(), i));
137 auto ret = cudaDeviceEnablePeerAccess(i, 0);
138 if (ret != cudaErrorPeerAccessAlreadyEnabled) {
139 checkCudaErrors(ret);
141 peerDeviceIds_.insert(i);
146 auto ret = cudaGetLastError();
147 if (ret != cudaErrorPeerAccessAlreadyEnabled) {
148 checkCudaErrors(ret);
158 checkCudaErrors(cudaStreamDestroy(stream_));
178 bool hasPeerAccess(
int peerDeviceId) {
return peerDeviceIds_.find(peerDeviceId) != peerDeviceIds_.end(); }
184 #endif //HEDGEHOG_ABSTRACT_CUDA_TASK_H bool enablePeerAccess_
Enable CUDA Peer Access through all CUDA devices available.
void shutdown() final
Shutdown an AbstractCUDATask to destroy the task's CUDA stream created during AbstractCUDATask::initi...
Base node for computation.
AbstractCUDATask(std::string_view const &name, size_t numberThreads=1)
Constructor for an AbstractCUDATask with name as mandatory parameter and the number of threads as opt...
int deviceId()
Task's device ID accessor.
virtual void shutdownCuda()
Virtual shutdown step, where user defined data structure can be destroyed.
bool enablePeerAccess() const
Accessor for peer access choice.
std::unordered_set< int > peerDeviceIds_
Sparse matrix of linked CUDA devices.
cudaStream_t stream_
CUDA stream linked to the task.
std::shared_ptr< core::CoreNode > core() final
Task's core accessor.
void initialize() final
Initialize an AbstractCUDATask to bound it to a CUDA device, and do the peer access if enabled...
void __checkCudaErrors(cudaError_t err, const char *file, const int line)
Inline helper function for all of the SDK helper functions, to catch and show CUDA Error...
AbstractCUDATask(size_t numberThreads=1)
Default constructor for an AbstractCUDATask.
cudaStream_t stream() const
Getter for CUDA task's stream.
bool hasPeerAccess(int peerDeviceId)
Accessor for peer access enabled for a specific device id.
virtual void initializeCuda()
Virtual initialization step, where user defined data structure can be initialized.
std::string_view name()
Task's name accessor.
bool automaticStart()
Task's automatic start accessor.
size_t numberThreads()
Task's number of threads accessor.
Abstract Task specialized for CUDA computation.
AbstractCUDATask(std::string_view const &name, size_t numberThreads, bool automaticStart, bool enablePeerAccess)
Constructor for an AbstractCUDATask with name, number of threads, automaticStart and enablePeerAccess...