Implementation for inc/util/cuda_support.h More...

#include "inc/util/cuda_support.h"
#include "inc/util/macros.h"
#include "inc/util/pair.h"
#include "inc/util/scalars.h"
#include "inc/util/vectors.h"
#include <cuda_runtime.h>
#include <numeric>
#include "inc/util/cublas_support.h"
#include "inc/util/tensor_timer.h"

Include dependency graph for cuda_support.cpp:

Classes
class	syten::Cuda::CudaAllocator
	Cuda allocator class, uses the buddy system. More...

Namespaces
namespace	syten
	Syten namespace.

namespace	syten::Cuda
	Support functions (memory allocation etc.) for CUDA-based GPUs.

Macros
#define	cuda_handle_error(...) cuda_handle_error_impl(__VA_ARGS__, SYTEN_STRINGIFY(__VA_ARGS__));
	Helper macro to handle CUDA errors, forwards to cuda_handle_error_impl() but first checks for any other errors and passes a stringified version of the call text into cuda_handle_error_impl() for pretty-printing. More...

Functions
std::uint16_t	syten::Cuda::allocator_get_max_size ()
	Returns the log2 of the maximal block size of the CUDA allocator. More...

std::uint16_t	syten::Cuda::allocator_get_min_size ()
	Returns the log2 of the minimal block size of the CUDA allocator. More...

void	syten::Cuda::allocator_print_status ()
	Prints the status of the CUDA allocator. More...

std::uint16_t	syten::Cuda::allocator_set_max_size (std::uint16_t sz)
	Sets the log2 of the maximal block size of the CUDA allocator. More...

std::uint16_t	syten::Cuda::allocator_set_min_size (std::uint16_t sz)
	Sets the log2 of the minimal block size of the CUDA allocator. More...

bool	syten::Cuda::cuda_compiled ()
	Returns true if CUDA support is compiled in. More...

bool	syten::Cuda::cuda_enabled ()
	Returns true if the list of allowed devices is not empty. More...

cudaError_t	syten::Cuda::cuda_handle_error_impl (cudaError_t err, std::string str, SourceLocation location=SourceLocation::current())
	Implementation for helper function to handle Cuda return values by throwing an assertion failure if the return value is not `cudaSuccess`. More...

cudaError_t	syten::Cuda::cuda_handle_error_impl (cudaError_t err, Vec< cudaError_t > acceptable_errors, std::string str, SourceLocation location=SourceLocation::current())
	Implementation for helper function to handle Cuda return values by throwing an assertion failure if the return value is neither `cudaSuccess` nor in the list of acceptable errors. More...

void	syten::Cuda::set_allowed_devices (Vec< std::int16_t > const &devices)
	Sets the allowed devices to the supplied list and enables inter-device memory access. More...

std::string	syten::Cuda::version ()
	Returns a version string describing the current CUDA version/compilation/enablement. More...

Allowed devices and allocation logic
std::int16_t	syten::Cuda::get_alloc_device ()
	Returns the device ID of the next allocation device to use. More...

Vec< std::int16_t > const &	syten::Cuda::get_allowed_devices ()
	Returns a list of allowed devices. More...

void	syten::Cuda::setup ()
	Sets up CUDA to allow all existing devices and generates the associated cuBLAS handles for the calling thread. More...

void	syten::Cuda::setup (Vec< std::int16_t > const &devices)
	Sets up CUDA to allow the specified devices and generates the associated cuBLAS handles for the calling thread. More...

Memory management functions.
CudaPtr< void >	syten::Cuda::alloc (std::size_t sz)
	Allocates `sz` bytes on the next CUDA allocation device. More...

CudaPtr< void >	syten::Cuda::alloc_on_device (std::size_t sz, std::int16_t device)
	Allocates `sz` bytes on the specified CUDA allocation device. More...

void	syten::Cuda::free (CudaPtr< void > ptr)
	Frees the allocation pointed to by `ptr`. More...

Copying objects and arrays.
void	syten::Cuda::copy (CudaPtr< const char > src, CudaPtr< char > dst, std::size_t num)
	Copies `num` bytes from `src` to `dst`. More...

void	syten::Cuda::copy (CudaPtr< const char > src, CudaPtr< char > dst, std::size_t num, CudaStream const &str)
	Copies `num` bytes from `src` to `dst` within stream `str`. More...

Device management and synchronisation.
Pair< Size, Size >	syten::Cuda::mem_status ()
	Returns a pair of free and total device memory. More...

void	syten::Cuda::select_device (std::int16_t device)
	Selects the specified device for the current thread. More...

void	syten::Cuda::synchronise ()
	Synchronizes with the current device. More...

Variables
static CudaAllocator	syten::Cuda::allocator
	Our CudaAllocator object, handling a buddy free list system. More...

static Vec< std::int16_t >	syten::Cuda::allowed_devices = {}
	Storage for the currently allowed devices. More...

Detailed Description

Implementation for inc/util/cuda_support.h

Classes

Namespaces

Macros

Functions

Variables

Detailed Description