Support functions (memory allocation etc.) for CUDA-based GPUs. More...

Classes
class	CudaAllocator
	Cuda allocator class, uses the buddy system. More...

class	CudaConstSpan
	CudaConstSpan is a non-owning constant reference to a continuous array of objects in CUDA storage. More...

class	CudaDynArray
	The CUDA-equivalent of DynArray. More...

class	CudaMutSpan
	CudaMutSpan is a non-owning reference to a continuous array of objects in CUDA storage which can be mutated through the span. More...

class	CudaPtr
	Implementation for syten::CudaPtr. More...

class	CudaStream
	Implementation for syten::CudaStream. More...

struct	Handle
	Represents a cublasHandle_t object which may or may not be in use. More...

struct	HandleBox
	RAII wrapper around a handle, putting it out of use on destruction. More...

Functions
void	addScaled (CudaDynArray< double > &a, CudaDynArray< double > const &b, double const factor)
	Adds `b` scaled by `factor` to `a`. More...

void	addScaled (CudaDynArray< float > &a, CudaDynArray< float > const &b, float const factor)
	Adds `b` scaled by `factor` to `a`. More...

void	addScaled (CudaDynArray< std::complex< double > > &a, CudaDynArray< std::complex< double > > const &b, std::complex< double > const factor)
	Adds `b` scaled by `factor` to `a`. More...

void	addScaled (CudaDynArray< std::complex< float > > &a, CudaDynArray< std::complex< float > > const &b, std::complex< float > const factor)
	Adds `b` scaled by `factor` to `a`. More...

std::uint16_t	allocator_get_max_size ()
	Returns the log2 of the maximal block size of the CUDA allocator. More...

std::uint16_t	allocator_get_min_size ()
	Returns the log2 of the minimal block size of the CUDA allocator. More...

void	allocator_print_status ()
	Prints the status of the CUDA allocator. More...

std::uint16_t	allocator_set_max_size (std::uint16_t sz)
	Sets the log2 of the maximal block size of the CUDA allocator. More...

std::uint16_t	allocator_set_min_size (std::uint16_t sz)
	Sets the log2 of the minimal block size of the CUDA allocator. More...

void	conj_copy (CudaConstSpan< double > src, CudaMutSpan< double > dst)
	Copies all data from `src` to `dst` (no conjugation takes places as `double` is real). More...

void	conj_copy (CudaConstSpan< float > src, CudaMutSpan< float > dst)
	Copies all data from `src` to `dst` (no conjugation takes places as `float` is real). More...

void	conj_copy (CudaConstSpan< std::complex< double > > src, CudaMutSpan< std::complex< double > > dst)
	Copies all data from `src` to `dst` while complex-conjugating every value. More...

void	conj_copy (CudaConstSpan< std::complex< float > > src, CudaMutSpan< std::complex< float > > dst)
	Copies all data from `src` to `dst` while complex-conjugating every value. More...

template<typename T >
void	copy (CudaConstSpan< T > const inp, CudaMutSpan< T > out)
	Copies all data from `inp` to `out`. More...

bool	cuda_compiled ()
	Returns true if CUDA support is compiled in. More...

void	cuda_dot_conj_kernel_impl (std::size_t sz, const std::complex< double > to_be_conj_a, const std::complex< double > b, std::complex< double > result, void cuda_stream)
	Calculates the scalar product of two CUDA arrays. More...

bool	cuda_enabled ()
	Returns true if the list of allowed devices is not empty. More...

cudaError_t	cuda_handle_error_impl (cudaError_t err, std::string str, SourceLocation location=SourceLocation::current())
	Implementation for helper function to handle Cuda return values by throwing an assertion failure if the return value is not `cudaSuccess`. More...

cudaError_t	cuda_handle_error_impl (cudaError_t err, Vec< cudaError_t > acceptable_errors, std::string str, SourceLocation location=SourceLocation::current())
	Implementation for helper function to handle Cuda return values by throwing an assertion failure if the return value is neither `cudaSuccess` nor in the list of acceptable errors. More...

double	dot (CudaDynArray< double > const &a, CudaDynArray< double > const &b, Conj const conj=Conj::n())
	Returns the scalar product between two CudaDynArray objects. More...

float	dot (CudaDynArray< float > const &a, CudaDynArray< float > const &b, Conj const conj=Conj::n())
	Returns the scalar product between two CudaDynArray objects. More...

std::complex< double >	dot (CudaDynArray< std::complex< double > > const &a, CudaDynArray< std::complex< double > > const &b, Conj const conj=Conj::n())
	Returns the scalar product between two CudaDynArray objects. More...

std::complex< float >	dot (CudaDynArray< std::complex< float > > const &a, CudaDynArray< std::complex< float > > const &b, Conj const conj=Conj::n())
	Returns the scalar product between two CudaDynArray objects. More...

HandleBox	get_handle (CudaStream const &str)
	Returns a cublas handle associated to the device `device` and switches the current CUDA device to this device. More...

cublasStatus_t	handle_error (cublasStatus_t err)
	Checks the error code and asserts if it is not CUBLAS_STATUS_SUCCESS. More...

template<typename T >
int	host_device (CudaConstSpan< T > const inp)
	Returns the device hosting the CudaConstSpan range. More...

template<typename T >
int	host_device (CudaMutSpan< T > const inp)
	Returns the device hosting the CudaMutSpan range. More...

void	initialise_handles ()
	Initialises the cublas handles for the currently allowed devices. More...

template<typename T >
T	max_element (CudaConstSpan< T > const inp)
	Returns the absolute-value maximal element of the range. More...

std::size_t	max_element_idx (CudaConstSpan< double > inp)
	Returns the 0-based index of the absolute-value maximal element of the range. More...

std::size_t	max_element_idx (CudaConstSpan< float > inp)
	Returns the 0-based index of the absolute-value maximal element of the range. More...

std::size_t	max_element_idx (CudaConstSpan< std::complex< double > > inp)
	Returns the 0-based index of the absolute-value maximal element of the range. More...

std::size_t	max_element_idx (CudaConstSpan< std::complex< float > > inp)
	Returns the 0-based index of the absolute-value maximal element of the range. More...

void	scale (CudaDynArray< double > &a, double const f)
	Scales the entries of `a` by `f`. More...

void	scale (CudaDynArray< float > &a, float const f)
	Scales the entries of `a` by `f`. More...

void	scale (CudaDynArray< std::complex< double > > &a, double const f)
	Scales the entries of `a` by `f`. More...

void	scale (CudaDynArray< std::complex< double > > &a, std::complex< double > const &f)
	Scales the entries of `a` by `f`. More...

void	scale (CudaDynArray< std::complex< float > > &a, float const f)
	Scales the entries of `a` by `f`. More...

void	scale (CudaDynArray< std::complex< float > > &a, std::complex< float > const f)
	Scales the entries of `a` by `f`. More...

void	set_allowed_devices (Vec< std::int16_t > const &devices)
	Sets the allowed devices to the supplied list and enables inter-device memory access. More...

std::string	status_description (cublasStatus_t err)
	Returns a string describing the specified cuBLAS error. More...

std::string	version ()
	Returns a version string describing the current CUDA version/compilation/enablement. More...

Allowed devices and allocation logic
Vec< std::int16_t > const &	get_allowed_devices ()
	Returns a list of allowed devices. More...

std::int16_t	get_alloc_device ()
	Returns the device ID of the next allocation device to use. More...

void	setup ()
	Sets up CUDA to allow all existing devices and generates the associated cuBLAS handles for the calling thread. More...

void	setup (Vec< std::int16_t > const &devices)
	Sets up CUDA to allow the specified devices and generates the associated cuBLAS handles for the calling thread. More...

Memory management functions.
CudaPtr< void >	alloc (std::size_t sz)
	Allocates `sz` bytes on the next CUDA allocation device. More...

CudaPtr< void >	alloc_on_device (std::size_t sz, std::int16_t device)
	Allocates `sz` bytes on the specified CUDA allocation device. More...

void	free (CudaPtr< void > ptr)
	Frees the allocation pointed to by `ptr`. More...

template<typename T >
CudaPtr< T >	alloc (std::size_t num)
	Allocates enough space for `num` objects of type T on the next CUDA allocation device and returns an appropriate CudaPtr. More...

template<typename T >
CudaPtr< T >	alloc_on_device (std::size_t sz, std::int16_t device)
	Allocates enough space for `num` objects of type T on the specified CUDA device and returns an appropriate CudaPtr. More...

template<typename T >
void	free (CudaPtr< T > ptr)
	Frees the allocation pointed to by `ptr`. More...

template<typename T >
std::int16_t	host_device (CudaPtr< T > v)
	Returns the device hosting the allocation pointed to by `v`. More...

Copying objects and arrays.
void	copy (CudaPtr< const char > src, CudaPtr< char > dst, std::size_t num)
	Copies `num` bytes from `src` to `dst`. More...

void	copy (CudaPtr< const char > src, CudaPtr< char > dst, std::size_t num, CudaStream const &str)
	Copies `num` bytes from `src` to `dst` within stream `str`. More...

template<typename T >
void	copy (CudaPtr< const typename IdentityType< T >::type > src, CudaPtr< T > dst, std::size_t num)
	Copies `num` objects of type `T` from `src` to `dst`. More...

template<typename T >
void	copy (CudaPtr< const typename IdentityType< T >::type > src, T *dst, std::size_t num)
	Copies `num` objects of type `T` from `src` to `dst`. More...

template<typename T >
void	copy (const T *src, CudaPtr< T > dst, std::size_t num)
	Copies `num` objects of type `T` from `src` to `dst`. More...

template<typename T >
void	copy (const T src, T dst, std::size_t num)
	Copies `num` objects of type `T` from `src` to `dst`. More...

template<typename T >
void	copy (CudaPtr< const typename IdentityType< T >::type > src, CudaPtr< T > dst, std::size_t num, CudaStream const &str)
	Copies `num` objects of type `T` from `src` to `dst` within stream `str`. More...

template<typename T >
void	copy (CudaPtr< const typename IdentityType< T >::type > src, T *dst, std::size_t num, CudaStream const &str)
	Copies `num` objects of type `T` from `src` to `dst` within stream `str`. More...

template<typename T >
void	copy (const T *src, CudaPtr< T > dst, std::size_t num, CudaStream const &str)
	Copies `num` objects of type `T` from `src` to `dst` within stream `str`. More...

template<typename T >
void	copy (const T src, T dst, std::size_t num, CudaStream const &str)
	Copies `num` objects of type `T` from `src` to `dst` within stream `str`. More...

Device management and synchronisation.
void	select_device (std::int16_t device)
	Selects the specified device for the current thread. More...

void	synchronise ()
	Synchronizes with the current device. More...

Pair< Size, Size >	mem_status ()
	Returns a pair of free and total device memory. More...

Variables
static CudaAllocator	allocator
	Our CudaAllocator object, handling a buddy free list system. More...

static Vec< std::int16_t >	allowed_devices = {}
	Storage for the currently allowed devices. More...

Vec< Pair< int, Vec< Handle > > >	handles
	Preinitialised handles for each of the devices. More...

Detailed Description

Support functions (memory allocation etc.) for CUDA-based GPUs.

Classes

Functions

Variables

Detailed Description