HTGS/doxygen/_i_cuda_task_8hpp_source.html

 // NIST-developed software is provided by NIST as a public service. You may use, copy and distribute copies of the software in any medium, provided that you keep intact this entire notice. You may improve, modify and create derivative works of the software or any portion of the software, and you may copy and distribute such modifications or works. Modified works should carry a notice stating that you changed the software and should note the date and nature of any such change. Please explicitly acknowledge the National Institute of Standards and Technology as the source of the software.
 // NIST-developed software is expressly provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
 // You are solely responsible for determining the appropriateness of using and distributing the software and you assume all risks associated with its use, including but not limited to the risks and costs of program errors, compliance with applicable laws, damage to or loss of data, programs or equipment, and the unavailability or interruption of operation. This software is not intended to be used in any situation where a failure could cause risk of injury or damage to property. The software developed by NIST employees is not subject to copyright protection within the United States.

 #ifdef USE_CUDA
 #ifndef HTGS_CUDATASK_HPP
 #define HTGS_CUDATASK_HPP

 #include <cuda_runtime_api.h>

 #include <vector>
 #include <unordered_map>
 #include <algorithm>

 #include <htgs/api/ITask.hpp>
 namespace htgs {

 template<class T>
 class MemoryData;

 template<class T, class U>
 class ICudaTask : public ITask<T, U> {
   static_assert(std::is_base_of<IData, T>::value, "T must derive from IData");
   static_assert(std::is_base_of<IData, U>::value, "U must derive from IData");

  public:

   ICudaTask(int *cudaIds, size_t numGpus, bool autoEnablePeerAccess = true) {
     this->cudaIds = cudaIds;
     this->numGpus = numGpus;
   }


   virtual ~ICudaTask() override {}

   virtual void initializeCudaGPU() {}

   virtual void executeTask(std::shared_ptr<T> data) = 0;

   virtual void shutdownCuda() {}

   virtual std::string getName() override {
     return "Unnamed GPU ITask";
   }

   std::string getDotFillColor() override {
     return "green3";
   }

 //  virtual std::string genDot(int flags, std::string dotId) override {
 //    std::string inOutLabel = (((DOTGEN_FLAG_SHOW_IN_OUT_TYPES & flags) != 0) ? ("\nin: "+this->inTypeName()+"\nout: "+this->outTypeName()) : "");
 //    std::string threadLabel = (((flags & DOTGEN_FLAG_SHOW_ALL_THREADING) != 0) ? "" : (" x" + std::to_string(this->getNumThreads())));
 //    return dotId + "[label=\"" + this->getName()  + threadLabel + inOutLabel + "\",style=filled,fillcolor=forestgreen,shape=box,color=black,width=.2,height=.2];\n";
 //  }

   virtual ITask <T, U> *copy() = 0;

   virtual void debug() override {}


   int getCudaId() {
     return this->cudaId;
   }

   bool requiresCopy(size_t pipelineId) {
     return std::find(this->nonPeerDevIds.begin(), this->nonPeerDevIds.end(),
       this->cudaIds[pipelineId]) != this->nonPeerDevIds.end();
   }

   template<class V>
   bool requiresCopy(std::shared_ptr<MemoryData<V>> data) {
     return this->requiresCopy(data->getPipelineId());
   }

   bool hasPeerToPeerCopy(size_t pipelineId) { return !requiresCopy((size_t)cudaId); }

   template<class V>
   bool autoCopy(V *destination, std::shared_ptr<MemoryData<V>> data, long numElems) {

     if (requiresCopy(data)) {
       cudaMemcpyPeerAsync((void *) destination,
                           this->cudaId,
                           (void *) data->get(),
                           this->cudaIds[data->getPipelineId()],
                           sizeof(V) * numElems,
                           this->stream);
       return true;
     } else {
       return false;
     }
   }

   void initialize() override final {
     this->cudaId = this->cudaIds[this->getPipelineId()];

     int numGpus;
     cudaGetDeviceCount(&numGpus);

     HTGS_ASSERT(this->cudaId < numGpus, "Error: Cuda ID: " << std::to_string(this->cudaId) << " is larger than the number of GPUs: " << std::to_string(numGpus));

     cudaSetDevice(this->cudaId);
     cudaStreamCreate(&stream);

     if (autoEnablePeerAccess) {

       for (size_t i = 0; i < this->numGpus; i++) {
         int peerId = this->cudaIds[i];
         if (peerId != this->cudaId) {
           int canAccess;
           cudaDeviceCanAccessPeer(&canAccess, this->cudaId, peerId);

           if (canAccess) {
             cudaDeviceEnablePeerAccess(peerId, 0);
           } else {
             this->nonPeerDevIds.push_back(peerId);
           }
         }
       }
     }

     this->initializeCudaGPU();
   }

   void shutdown() override final {
     this->shutdownCuda();
     cudaStreamDestroy(stream);
   }

   const cudaStream_t &getStream() const {
     return stream;
   }

   int *getCudaIds() {
     return this->cudaIds;
   }

   size_t getNumGPUs() {
     return this->numGpus;
   }

   void syncStream() {
     cudaStreamSynchronize(stream);
   }

  private:
   cudaStream_t stream;
   int *cudaIds;

   size_t numGpus;
   int cudaId;
   std::vector<int> nonPeerDevIds;
   bool autoEnablePeerAccess;
 };

 }
 #endif //HTGS_CUDATASK_HPP
 #endif //USE_CUDA


htgs::ICudaTask::autoCopy
bool autoCopy(V *destination, std::shared_ptr< MemoryData< V >> data, long numElems)
Will automatically copy from one GPU to another (if it is required).
Definition: ICudaTask.hpp:254

htgs::ICudaTask::getCudaId
int getCudaId()
Gets the Cuda Id for this cudaTask.
Definition: ICudaTask.hpp:200

htgs::ICudaTask::requiresCopy
bool requiresCopy(size_t pipelineId)
Checks if the requested pipelineId requires GPU-to-GPU copy.
Definition: ICudaTask.hpp:211

htgs::ICudaTask::requiresCopy
bool requiresCopy(std::shared_ptr< MemoryData< V >> data)
Checks if the requested pipelineId requires GPU-to-GPU copy.
Definition: ICudaTask.hpp:225

htgs::ICudaTask::initialize
void initialize() override final
Initializes the CudaTask to be bound to a particular GPU.
Definition: ICudaTask.hpp:273

htgs::ICudaTask::stream
cudaStream_t stream
The CUDA stream for the ICudaTask (set after initialize)
Definition: ICudaTask.hpp:347

htgs::ICudaTask::debug
virtual void debug() override
Virtual function that can be used to provide debug information.
Definition: ICudaTask.hpp:189

htgs::ICudaTask::cudaId
int cudaId
The CudaID for the ICudaTask (set after initialize)
Definition: ICudaTask.hpp:351

htgs::MemoryData
Describes memory allocated by a MemoryManager to manage shared memory across multiple ITask...
Definition: ICudaTask.hpp:28

htgs::ICudaTask::numGpus
size_t numGpus
The number of GPUs.
Definition: ICudaTask.hpp:350

htgs::ICudaTask::getNumGPUs
size_t getNumGPUs()
Gets the number of GPUs specified during ICudaTask construction.
Definition: ICudaTask.hpp:333

htgs::ICudaTask::getCudaIds
int * getCudaIds()
Gets the cudaIds specified during ICudaTask construction.
Definition: ICudaTask.hpp:325

htgs::ICudaTask::shutdown
void shutdown() override final
Shutsdown the ICudaTask.
Definition: ICudaTask.hpp:308

htgs::ICudaTask::executeTask
virtual void executeTask(std::shared_ptr< T > data)=0
Executes the ICudaTask on some data.

htgs::AnyITask::getPipelineId
size_t getPipelineId()
Gets the pipeline ID.
Definition: AnyITask.hpp:367

htgs::ICudaTask::getStream
const cudaStream_t & getStream() const
Gets the CUDA stream for this CUDA task.
Definition: ICudaTask.hpp:317

htgs::ICudaTask::ICudaTask
ICudaTask(int *cudaIds, size_t numGpus, bool autoEnablePeerAccess=true)
Creates an ICudaTask.
Definition: ICudaTask.hpp:135

htgs::ITask
An interface to process input data and forward results within a TaskGraph.
Definition: ITask.hpp:165

htgs::ICudaTask::initializeCudaGPU
virtual void initializeCudaGPU()
Virtual function that is called when the ICudaTask has been initialized and is bound to a CUDA GPU...
Definition: ICudaTask.hpp:149

htgs::ICudaTask::cudaIds
int * cudaIds
The array of cuda Ids (one per GPU)
Definition: ICudaTask.hpp:348

HTGS_ASSERT
#define HTGS_ASSERT(condition, message)
Prints a more meaningful assertion message and terminates if the condition fails. ...
Definition: debug_message.hpp:25

htgs::ICudaTask::syncStream
void syncStream()
Synchronizes the Cuda stream associated with this task.
Definition: ICudaTask.hpp:342

htgs::ICudaTask::nonPeerDevIds
std::vector< int > nonPeerDevIds
The list of CudaIds that do not have peer-to-peer access.
Definition: ICudaTask.hpp:352

htgs::ICudaTask::getName
virtual std::string getName() override
Virtual function that gets the name of this ICudaTask.
Definition: ICudaTask.hpp:166

htgs::ICudaTask
An ICudaTask is used to attach a task to an NVIDIA Cuda GPU.
Definition: ICudaTask.hpp:120

htgs::ICudaTask::autoEnablePeerAccess
bool autoEnablePeerAccess
Flag to automatically enables peer access between multiple GPUs.
Definition: ICudaTask.hpp:353

htgs::ICudaTask::getDotFillColor
std::string getDotFillColor() override
Gets the color for filling the shape for graphviz dot.
Definition: ICudaTask.hpp:170

htgs::AnyITask::pipelineId
size_t pipelineId
The execution pipeline id for the ITask.
Definition: AnyITask.hpp:604

ITask.hpp
An interface to process input data and forward results within a TaskGraph.

htgs::ICudaTask::copy
virtual ITask< T, U > * copy()=0
Pure virtual function that copies this ICudaTask.

htgs::ICudaTask::hasPeerToPeerCopy
bool hasPeerToPeerCopy(size_t pipelineId)
Checks if the requested pipelineId allows peer to peer GPU copy.
Definition: ICudaTask.hpp:236

htgs
Definition: Bookkeeper.hpp:23

htgs::ICudaTask::shutdownCuda
virtual void shutdownCuda()
Virtual function that is called when the ICudaTask is shutting down.
Definition: ICudaTask.hpp:160