HTGS  v2.0
The Hybrid Task Graph Scheduler
ICudaTask.hpp
Go to the documentation of this file.
1 
2 // NIST-developed software is provided by NIST as a public service. You may use, copy and distribute copies of the software in any medium, provided that you keep intact this entire notice. You may improve, modify and create derivative works of the software or any portion of the software, and you may copy and distribute such modifications or works. Modified works should carry a notice stating that you changed the software and should note the date and nature of any such change. Please explicitly acknowledge the National Institute of Standards and Technology as the source of the software.
3 // NIST-developed software is expressly provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
4 // You are solely responsible for determining the appropriateness of using and distributing the software and you assume all risks associated with its use, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and the unavailability or interruption of operation. This software is not intended to be used in any situation where a failure could cause risk of injury or damage to property. The software developed by NIST employees is not subject to copyright protection within the United States.
5 
14 #ifdef USE_CUDA
15 #ifndef HTGS_CUDATASK_HPP
16 #define HTGS_CUDATASK_HPP
17 
18 #include <cuda_runtime_api.h>
19 
20 #include <vector>
21 #include <unordered_map>
22 #include <algorithm>
23 
24 #include <htgs/api/ITask.hpp>
25 namespace htgs {
26 
27 template<class T>
28 class MemoryData;
29 
119 template<class T, class U>
120 class ICudaTask : public ITask<T, U> {
121  static_assert(std::is_base_of<IData, T>::value, "T must derive from IData");
122  static_assert(std::is_base_of<IData, U>::value, "U must derive from IData");
123 
124  public:
125 
135  ICudaTask(int *cudaIds, size_t numGpus, bool autoEnablePeerAccess = true) {
136  this->cudaIds = cudaIds;
137  this->numGpus = numGpus;
138  }
139 
143 
144  virtual ~ICudaTask() override {}
145 
149  virtual void initializeCudaGPU() {}
150 
155  virtual void executeTask(std::shared_ptr<T> data) = 0;
156 
160  virtual void shutdownCuda() {}
161 
166  virtual std::string getName() override {
167  return "Unnamed GPU ITask";
168  }
169 
170  std::string getDotFillColor() override {
171  return "green3";
172  }
173 
174 // virtual std::string genDot(int flags, std::string dotId) override {
175 // std::string inOutLabel = (((DOTGEN_FLAG_SHOW_IN_OUT_TYPES & flags) != 0) ? ("\nin: "+this->inTypeName()+"\nout: "+this->outTypeName()) : "");
176 // std::string threadLabel = (((flags & DOTGEN_FLAG_SHOW_ALL_THREADING) != 0) ? "" : (" x" + std::to_string(this->getNumThreads())));
177 // return dotId + "[label=\"" + this->getName() + threadLabel + inOutLabel + "\",style=filled,fillcolor=forestgreen,shape=box,color=black,width=.2,height=.2];\n";
178 // }
179 
184  virtual ITask <T, U> *copy() = 0;
185 
189  virtual void debug() override {}
190 
194 
200  int getCudaId() {
201  return this->cudaId;
202  }
203 
211  bool requiresCopy(size_t pipelineId) {
212  return std::find(this->nonPeerDevIds.begin(), this->nonPeerDevIds.end(),
213  this->cudaIds[pipelineId]) != this->nonPeerDevIds.end();
214  }
215 
224  template<class V>
225  bool requiresCopy(std::shared_ptr<MemoryData<V>> data) {
226  return this->requiresCopy(data->getPipelineId());
227  }
228 
236  bool hasPeerToPeerCopy(size_t pipelineId) { return !requiresCopy((size_t)cudaId); }
237 
253  template<class V>
254  bool autoCopy(V *destination, std::shared_ptr<MemoryData<V>> data, long numElems) {
255 
256  if (requiresCopy(data)) {
257  cudaMemcpyPeerAsync((void *) destination,
258  this->cudaId,
259  (void *) data->get(),
260  this->cudaIds[data->getPipelineId()],
261  sizeof(V) * numElems,
262  this->stream);
263  return true;
264  } else {
265  return false;
266  }
267  }
268 
273  void initialize() override final {
274  this->cudaId = this->cudaIds[this->getPipelineId()];
275 
276  int numGpus;
277  cudaGetDeviceCount(&numGpus);
278 
279  HTGS_ASSERT(this->cudaId < numGpus, "Error: Cuda ID: " << std::to_string(this->cudaId) << " is larger than the number of GPUs: " << std::to_string(numGpus));
280 
281  cudaSetDevice(this->cudaId);
282  cudaStreamCreate(&stream);
283 
284  if (autoEnablePeerAccess) {
285 
286  for (size_t i = 0; i < this->numGpus; i++) {
287  int peerId = this->cudaIds[i];
288  if (peerId != this->cudaId) {
289  int canAccess;
290  cudaDeviceCanAccessPeer(&canAccess, this->cudaId, peerId);
291 
292  if (canAccess) {
293  cudaDeviceEnablePeerAccess(peerId, 0);
294  } else {
295  this->nonPeerDevIds.push_back(peerId);
296  }
297  }
298  }
299  }
300 
301  this->initializeCudaGPU();
302  }
303 
308  void shutdown() override final {
309  this->shutdownCuda();
310  cudaStreamDestroy(stream);
311  }
312 
317  const cudaStream_t &getStream() const {
318  return stream;
319  }
320 
325  int *getCudaIds() {
326  return this->cudaIds;
327  }
328 
333  size_t getNumGPUs() {
334  return this->numGpus;
335  }
336 
342  void syncStream() {
343  cudaStreamSynchronize(stream);
344  }
345 
346  private:
347  cudaStream_t stream;
348  int *cudaIds;
349 
350  size_t numGpus;
351  int cudaId;
352  std::vector<int> nonPeerDevIds;
354 };
355 
356 }
357 #endif //HTGS_CUDATASK_HPP
358 #endif //USE_CUDA
359 
360 
bool autoCopy(V *destination, std::shared_ptr< MemoryData< V >> data, long numElems)
Will automatically copy from one GPU to another (if it is required).
Definition: ICudaTask.hpp:254
int getCudaId()
Gets the Cuda Id for this cudaTask.
Definition: ICudaTask.hpp:200
bool requiresCopy(size_t pipelineId)
Checks if the requested pipelineId requires GPU-to-GPU copy.
Definition: ICudaTask.hpp:211
bool requiresCopy(std::shared_ptr< MemoryData< V >> data)
Checks if the requested pipelineId requires GPU-to-GPU copy.
Definition: ICudaTask.hpp:225
void initialize() override final
Initializes the CudaTask to be bound to a particular GPU.
Definition: ICudaTask.hpp:273
cudaStream_t stream
The CUDA stream for the ICudaTask (set after initialize)
Definition: ICudaTask.hpp:347
virtual void debug() override
Virtual function that can be used to provide debug information.
Definition: ICudaTask.hpp:189
int cudaId
The CudaID for the ICudaTask (set after initialize)
Definition: ICudaTask.hpp:351
Describes memory allocated by a MemoryManager to manage shared memory across multiple ITask...
Definition: ICudaTask.hpp:28
size_t numGpus
The number of GPUs.
Definition: ICudaTask.hpp:350
size_t getNumGPUs()
Gets the number of GPUs specified during ICudaTask construction.
Definition: ICudaTask.hpp:333
int * getCudaIds()
Gets the cudaIds specified during ICudaTask construction.
Definition: ICudaTask.hpp:325
void shutdown() override final
Shutsdown the ICudaTask.
Definition: ICudaTask.hpp:308
virtual void executeTask(std::shared_ptr< T > data)=0
Executes the ICudaTask on some data.
size_t getPipelineId()
Gets the pipeline ID.
Definition: AnyITask.hpp:367
const cudaStream_t & getStream() const
Gets the CUDA stream for this CUDA task.
Definition: ICudaTask.hpp:317
ICudaTask(int *cudaIds, size_t numGpus, bool autoEnablePeerAccess=true)
Creates an ICudaTask.
Definition: ICudaTask.hpp:135
An interface to process input data and forward results within a TaskGraph.
Definition: ITask.hpp:165
virtual void initializeCudaGPU()
Virtual function that is called when the ICudaTask has been initialized and is bound to a CUDA GPU...
Definition: ICudaTask.hpp:149
int * cudaIds
The array of cuda Ids (one per GPU)
Definition: ICudaTask.hpp:348
#define HTGS_ASSERT(condition, message)
Prints a more meaningful assertion message and terminates if the condition fails. ...
Definition: debug_message.hpp:25
void syncStream()
Synchronizes the Cuda stream associated with this task.
Definition: ICudaTask.hpp:342
std::vector< int > nonPeerDevIds
The list of CudaIds that do not have peer-to-peer access.
Definition: ICudaTask.hpp:352
virtual std::string getName() override
Virtual function that gets the name of this ICudaTask.
Definition: ICudaTask.hpp:166
An ICudaTask is used to attach a task to an NVIDIA Cuda GPU.
Definition: ICudaTask.hpp:120
bool autoEnablePeerAccess
Flag to automatically enables peer access between multiple GPUs.
Definition: ICudaTask.hpp:353
std::string getDotFillColor() override
Gets the color for filling the shape for graphviz dot.
Definition: ICudaTask.hpp:170
size_t pipelineId
The execution pipeline id for the ITask.
Definition: AnyITask.hpp:604
An interface to process input data and forward results within a TaskGraph.
virtual ITask< T, U > * copy()=0
Pure virtual function that copies this ICudaTask.
bool hasPeerToPeerCopy(size_t pipelineId)
Checks if the requested pipelineId allows peer to peer GPU copy.
Definition: ICudaTask.hpp:236
Definition: Bookkeeper.hpp:23
virtual void shutdownCuda()
Virtual function that is called when the ICudaTask is shutting down.
Definition: ICudaTask.hpp:160