d0/dc2/a00104_source.html

// NIST-developed software is provided by NIST as a public service. You may use, copy and distribute copies of the

// software in any medium, provided that you keep intact this entire notice. You may improve, modify and create

// derivative works of the software or any portion of the software, and you may copy and distribute such modifications

// or works. Modified works should carry a notice stating that you changed the software and should note the date and

// nature of any such change. Please explicitly acknowledge the National Institute of Standards and Technology as the

// source of the software. NIST-developed software is expressly provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND,

// EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF

// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST NEITHER REPRESENTS NOR

// WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE

// CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR THE RESULTS

// THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE. You

// are solely responsible for determining the appropriateness of using and distributing the software and you assume

// all risks associated with its use, including but not limited to the risks and costs of program errors, compliance

// with applicable laws, damage to or loss of data, programs or equipment, and the unavailability or interruption of

// operation. This software is not intended to be used in any situation where a failure could cause risk of injury or

// damage to property. The software developed by NIST employees is not subject to copyright protection within the

// United States.


#ifndef HEDGEHOG_ABSTRACT_CUDA_TASK_H

#define HEDGEHOG_ABSTRACT_CUDA_TASK_H

#ifdef HH_USE_CUDA


#include <cuda_runtime.h>

#include <unordered_set>


#include "abstract_task.h"

#include "../../tools/cuda_debugging.h"


namespace hh {


template<size_t Separator, class ...AllTypes>

class AbstractCUDATask : public AbstractTask<Separator, AllTypes...> {

 private:

  bool enablePeerAccess_ = false;

  std::unordered_set<int> peerDeviceIds_ = {};

  cudaStream_t stream_ = {};


 public:

  AbstractCUDATask(std::string const &name, size_t numberThreads, bool enablePeerAccess, bool automaticStart = false)

      : AbstractTask<Separator, AllTypes...>(name, numberThreads, automaticStart),

        enablePeerAccess_(enablePeerAccess) {

    this->coreTask()->printOptions().background({0x76, 0xb9, 0x00, 0xff});

    this->coreTask()->printOptions().font({0xff, 0xff, 0xff, 0xff});

  }


  explicit AbstractCUDATask(std::string const name = "CudaTask", size_t numberThreads = 1) :

      AbstractCUDATask<Separator, AllTypes...>(name, numberThreads, false, false) {};


  AbstractCUDATask(std::shared_ptr<hh::core::CoreTask<Separator, AllTypes...>> coreTask, bool enablePeerAccess)

      : AbstractTask<Separator, AllTypes...>(std::shared_ptr<hh::core::CoreTask<Separator, AllTypes...>>(coreTask)),

        enablePeerAccess_(enablePeerAccess) {

    this->coreTask()->printOptions().background({0x76, 0xb9, 0x00, 0xff});

    this->coreTask()->printOptions().font({0xff, 0xff, 0xff, 0xff});

  }


  ~AbstractCUDATask() override {

    if (this->memoryManager() != nullptr) {

      checkCudaErrors(cudaSetDevice(this->memoryManager()->deviceId()));

    }

  }


  void initialize() final {

    int numGpus = 0;

    int canAccess = 0;

    checkCudaErrors(cudaGetDeviceCount(&numGpus));

    assert(this->deviceId() < numGpus);

    checkCudaErrors(cudaSetDevice(this->deviceId()));

    checkCudaErrors(cudaStreamCreate(&stream_));


    if (enablePeerAccess_) {

      for (int i = 0; i < numGpus; ++i) {

        if (i != this->deviceId()) {

          checkCudaErrors(cudaDeviceCanAccessPeer(&canAccess, this->deviceId(), i));


          if (canAccess) {

            auto ret = cudaDeviceEnablePeerAccess(i, 0);

            if (ret != cudaErrorPeerAccessAlreadyEnabled) {

              checkCudaErrors(ret);

            }

            peerDeviceIds_.insert(i);

          }

        }

      }

    }

    auto ret = cudaGetLastError();

    if (ret != cudaErrorPeerAccessAlreadyEnabled) {

      checkCudaErrors(ret);

    }

    this->initializeCuda();

  }


  void shutdown() final {

    this->shutdownCuda();

    checkCudaErrors(cudaStreamDestroy(stream_));

  }


  virtual void initializeCuda() {}


  virtual void shutdownCuda() {}


  bool enablePeerAccess() const { return enablePeerAccess_; }


  cudaStream_t stream() const { return stream_; }


  bool hasPeerAccess(int peerDeviceId) { return peerDeviceIds_.find(peerDeviceId) != peerDeviceIds_.end(); }


};


}

#endif //HH_USE_CUDA

#endif //HEDGEHOG_ABSTRACT_CUDA_TASK_H

abstract_task.h

checkCudaErrors
#define checkCudaErrors(err)
Definition: cuda_debugging.h:64

hh
Hedgehog main namespace.
Definition: abstract_execution_pipeline.h:28

hh::AbstractCUDATask
Abstract Task specialized for CUDA computation.
Definition: abstract_cuda_task.h:46

hh::AbstractCUDATask::AbstractCUDATask
AbstractCUDATask(std::string const &name, size_t numberThreads, bool enablePeerAccess, bool automaticStart=false)
AbstractCUDATask full constructor.
Definition: abstract_cuda_task.h:58

hh::AbstractCUDATask::stream_
cudaStream_t stream_
CUDA stream linked to the task.
Definition: abstract_cuda_task.h:50

hh::AbstractCUDATask::initializeCuda
virtual void initializeCuda()
Virtual initialization step, where user defined data structure can be initialized.
Definition: abstract_cuda_task.h:129

hh::AbstractCUDATask::enablePeerAccess_
bool enablePeerAccess_
Enable CUDA Peer Access through all CUDA devices available.
Definition: abstract_cuda_task.h:48

hh::AbstractCUDATask::AbstractCUDATask
AbstractCUDATask(std::shared_ptr< hh::core::CoreTask< Separator, AllTypes... > > coreTask, bool enablePeerAccess)
Custom core task constructor.
Definition: abstract_cuda_task.h:74

hh::AbstractCUDATask::stream
cudaStream_t stream() const
Getter for CUDA task's stream.
Definition: abstract_cuda_task.h:140

hh::AbstractCUDATask::~AbstractCUDATask
~AbstractCUDATask() override
Default destructor.
Definition: abstract_cuda_task.h:82

hh::AbstractCUDATask::shutdownCuda
virtual void shutdownCuda()
Virtual shutdown step, where user defined data structure can be destroyed.
Definition: abstract_cuda_task.h:132

hh::AbstractCUDATask::hasPeerAccess
bool hasPeerAccess(int peerDeviceId)
Accessor for peer access enabled for a specific device id.
Definition: abstract_cuda_task.h:145

hh::AbstractCUDATask::initialize
void initialize() final
Initialize an AbstractCUDATask to bind it to a CUDA device, and do the peer access if enabled....
Definition: abstract_cuda_task.h:90

hh::AbstractCUDATask::AbstractCUDATask
AbstractCUDATask(std::string const name="CudaTask", size_t numberThreads=1)
Main constructor for a AbstractCUDATask.
Definition: abstract_cuda_task.h:68

hh::AbstractCUDATask::shutdown
void shutdown() final
Shutdown an AbstractCUDATask to destroy the task's CUDA stream created during AbstractCUDATask::initi...
Definition: abstract_cuda_task.h:123

hh::AbstractCUDATask::peerDeviceIds_
std::unordered_set< int > peerDeviceIds_
Sparse matrix of linked CUDA devices.
Definition: abstract_cuda_task.h:49

hh::AbstractCUDATask::enablePeerAccess
bool enablePeerAccess() const
Accessor for peer access choice.
Definition: abstract_cuda_task.h:136

hh::AbstractTask
Base node for computation.
Definition: abstract_task.h:115

hh::AbstractTask< Separator, AllTypes... >::coreTask
std::shared_ptr< hh::core::CoreTask< Separator, AllTypes... > > const & coreTask() const
Accessor to the core task.
Definition: abstract_task.h:172

hh::AbstractTask< Separator, AllTypes... >::deviceId
int deviceId()
Accessor to device id linked to the task (default 0 for CPU task)
Definition: abstract_task.h:176

hh::behavior::Copyable< AbstractTask< Separator, AllTypes... > >::numberThreads
size_t numberThreads() const
Number of threads accessor.
Definition: copyable.h:51

hh::behavior::Node::name
std::string name() const
Node's name accessor.
Definition: node.h:53

hh::behavior::Node::core
std::shared_ptr< hh::core::abstraction::NodeAbstraction > const & core() const
Core accessor.
Definition: node.h:49

hh::behavior::TaskNode::memoryManager
std::shared_ptr< AbstractMemoryManager > const & memoryManager() const
Memory manager accessor.
Definition: task_node.h:53

hh::core::CoreTask
Task core.
Definition: core_task.h:71