SyTen
syten::CudaDenseTensorImpl Namespace Reference

Implementation namespace for CUDA dense tensors. More...

Classes

struct  CudaDenseTensor
 A dense tensor using CUDA for storage and computing if SYTEN_USE_CUDA is defined. More...
 

Functions

template<Rank rank, typename Scalar >
void addScaled (CudaDenseTensor< rank, Scalar > &a, CudaDenseTensor< rank, Scalar > const &b, typename IdentityType< Scalar >::type const factor, EliminateZeros const ezeros=EliminateZeros::No)
 Adds the entries of the CUDA dense tensor b scaled by factor to those of a. More...
 
template<Rank rank, typename Scalar >
void addScaled (CudaDenseTensor< rank, Scalar > &a, DenseTensor< rank, Scalar > const &b, typename IdentityType< Scalar >::type const factor, EliminateZeros const ezeros)
 Adds the entries of the host-based dense tensor b scaled by factor to the CUDA dense tensor a. More...
 
template<Rank rank, typename Scalar >
void addScaled (CudaDenseTensor< rank, Scalar > &a, IdentityDenseTensor< rank, Scalar > const &b, typename IdentityType< Scalar >::type const factor, EliminateZeros const ezeros)
 Adds the entries of the identity tensor b scaled by factor to the CUDA dense tensor a. More...
 
template<Rank rank, typename Scalar >
void addScaled (CudaDenseTensor< rank, Scalar > &a, OffsetDenseTensor< rank, Scalar > const &b, typename IdentityType< Scalar >::type const factor, EliminateZeros const ezeros)
 Adds the entries of the offset dense tensor b scaled by factor to the CUDA dense tensor a. More...
 
template<Rank rank, typename Scalar >
auto conj (CudaDenseTensor< rank, Scalar > const &in)
 Returns a complex-conjugated copy of the input CUDA dense tensor on the same device. More...
 
void cuda_iszero_kernel_impl (std::size_t sz, const double *inp, int *is_nonzero, void *str)
 Launcher for the CUDA isZero kernel, double version. More...
 
void cuda_iszero_kernel_impl (std::size_t sz, const float *inp, int *is_nonzero, void *str)
 Launcher for the CUDA isZero kernel, float version. More...
 
void cuda_iszero_kernel_impl (std::size_t sz, const std::complex< double > *inp, int *is_nonzero, void *str)
 Launcher for the CUDA isZero kernel, std::complex<double> version. More...
 
void cuda_iszero_kernel_impl (std::size_t sz, const std::complex< float > *inp, int *is_nonzero, void *str)
 Launcher for the CUDA isZero kernel, std::complex<float> version. More...
 
void cuda_mm_cm (CudaPtr< const double > at, CudaPtr< const double > bt, CudaPtr< double > rt, Size const dx, Size const dy, Size const dz)
 CUDA real matrix-matrix multiplication. More...
 
void cuda_mm_cm (CudaPtr< const float > at, CudaPtr< const float > bt, CudaPtr< float > rt, Size const dx, Size const dy, Size const dz)
 CUDA real float matrix-matrix multiplication. More...
 
void cuda_mm_cm (CudaPtr< const std::complex< double > > at, CudaPtr< const std::complex< double > > bt, CudaPtr< std::complex< double > > rt, Size const dx, Size const dy, Size const dz)
 CUDA complex matrix-matrix multiplication. More...
 
void cuda_mm_cm (CudaPtr< const std::complex< float > > at, CudaPtr< const std::complex< float > > bt, CudaPtr< std::complex< float > > rt, Size const dx, Size const dy, Size const dz)
 CUDA complex float matrix-matrix multiplication. More...
 
template<Rank rank, typename Scalar , TransposeMethod method = TransposeMethod::Default, Rank... Ranks>
void cuda_transpose_impl (CudaConstSpan< Scalar > inp, CudaMutSpan< Scalar > out, ConstSpan< Index > in_perm, ConstSpan< Index > in_dim, Conj do_conj=Conj::n(), bool do_checks=true, std::index_sequence< Ranks... >={})
 Implementation of the CUDA tensor transposition routines, to be called only from cuda_transpose(). More...
 
template<Rank rank, typename Scalar >
void cuda_transpose_kernel (CudaConstSpan< Scalar > inp, CudaMutSpan< Scalar > out, ConstSpan< IndexNumber > perm, ConstSpan< Index > dim, Conj do_conj)
 Wrapper around the CUDA transpose kernels which sets everything up such that the functions in cuda_transpose_impl_cukrn.h really only have to launch the kernels. More...
 
void cuda_transpose_kernel_impl (std::uint32_t rank, std::size_t sz, const double *inp, double *out, cukrn_transpose_array const &old_dim, cukrn_transpose_array const &new_dim, cukrn_transpose_array const &ar_perm, void *str, bool do_conj)
 Launcher for the CUDA tensor transposition kernel, double version. More...
 
void cuda_transpose_kernel_impl (std::uint32_t rank, std::size_t sz, const float *inp, float *out, cukrn_transpose_array const &old_dim, cukrn_transpose_array const &new_dim, cukrn_transpose_array const &ar_perm, void *str, bool do_conj)
 Launcher for the CUDA tensor transposition kernel, float version. More...
 
void cuda_transpose_kernel_impl (std::uint32_t rank, std::size_t sz, const std::complex< double > *inp, std::complex< double > *out, cukrn_transpose_array const &old_dim, cukrn_transpose_array const &new_dim, cukrn_transpose_array const &ar_perm, void *str, bool do_conj)
 Launcher for the CUDA tensor transposition kernel, std::complex<double> version. More...
 
void cuda_transpose_kernel_impl (std::uint32_t rank, std::size_t sz, const std::complex< float > *inp, std::complex< float > *out, cukrn_transpose_array const &old_dim, cukrn_transpose_array const &new_dim, cukrn_transpose_array const &ar_perm, void *str, bool do_conj)
 Launcher for the CUDA tensor transposition kernel, std::complex<float> version. More...
 
void cuda_transpose_r2 (CudaConstSpan< double > inp, CudaMutSpan< double > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n())
 Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More...
 
void cuda_transpose_r2 (CudaConstSpan< float > inp, CudaMutSpan< float > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n())
 Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More...
 
void cuda_transpose_r2 (CudaConstSpan< std::complex< double > > inp, CudaMutSpan< std::complex< double > > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n())
 Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More...
 
void cuda_transpose_r2 (CudaConstSpan< std::complex< float > > inp, CudaMutSpan< std::complex< float > > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n())
 Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More...
 
template<Rank rank, typename Scalar >
void cuda_transpose_recursive (CudaConstSpan< Scalar > inp, CudaMutSpan< Scalar > out, ConstSpan< IndexNumber > in_perm, ConstSpan< Index > in_dim, Conj do_conj=Conj::n())
 Entry point for the CUDA recursive tensor transposition implementation, to be called only from cuda_transpose_impl(). More...
 
template<Rank rank, typename Scalar >
void cuda_transpose_recursive_impl (CudaConstSpan< Scalar > inp, CudaMutSpan< Scalar > out, [[maybe_unused]] CudaMutSpan< Scalar > workspace, [[maybe_unused]] ConstSpan< IndexNumber > perm, [[maybe_unused]] ConstSpan< Index > dim)
 Recursive implementation of the CUDA tensor transposition. More...
 
template<Rank summed, Rank frank, Rank srank, Rank rrank, typename Scalar >
void gemm_transpose (CudaDenseTensor< frank, Scalar > const &a, CudaDenseTensor< srank, Scalar > const &b, CudaDenseTensor< rrank, Scalar > &r, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b)
 Last part of a transpose-transpose-gemm-transpose CUDA tensor contraction. More...
 
template<Rank rank, typename Scalar >
bool isZero (CudaDenseTensor< rank, Scalar > const &c)
 Returns true if the supplied CudaDenseTensor only has zero entries. More...
 
template<Rank rank, typename Scalar >
ScalarBase< Scalar >::type normSqd (CudaDenseTensor< rank, Scalar > const &a)
 Returns the squared norm of the input tensor. More...
 
template<Rank rank, typename Scalar , typename ScalarF >
CudaDenseTensor< rank, Scalar > & operator*= (CudaDenseTensor< rank, Scalar > &a, ScalarF const &b)
 Scales every element of the CUDA dense tensor a by the scalar factor b. More...
 
template<Rank summed, Rank frank, Rank srank, typename Scalar , std::enable_if_t<(int(frank)+int(srank) - 2 *int(summed) > 0), int > = 0>
CudaDenseTensor< frank+srank - 2 *summed, Scalar > prodD (CudaDenseTensor< frank, Scalar > const &a, CudaDenseTensor< srank, Scalar > const &b, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b, bool conjugate=false, EliminateZeros const ezeros=EliminateZeros::No, DenseProduct::TemporaryTransposeStorage< Scalar, frank, srank > *=nullptr)
 Product/Contraction of two dense CUDA tensors. More...
 
template<Rank summed, Rank frank, Rank srank, typename Scalar , std::enable_if_t<(int(frank)+int(srank) - 2 *int(summed) > 0), int > = 0>
GenericDenseTensor< frank+srank - 2 *summed, Scalar > prodD (CudaDenseTensor< frank, Scalar > const &a, DenseTensor< srank, Scalar > const &b, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b, bool conjugate=false, EliminateZeros const ezeros=EliminateZeros::No, DenseProduct::TemporaryTransposeStorage< Scalar, frank, srank > *=nullptr)
 Product/Contraction of a CUDA dense tensor and a standard dense tensor. More...
 
template<Rank r, typename Scalar >
Scalar prodD (CudaDenseTensor< r, Scalar > const &a, CudaDenseTensor< r, Scalar > const &b, std::array< int, r > const &c_a, std::array< int, r > const &c_b, bool conjugate=false)
 Reordering scalar product of two CUDA dense tensors. More...
 
template<Rank r, typename Scalar >
Scalar prodD (CudaDenseTensor< r, Scalar > const &a, DenseTensor< r, Scalar > const &b, std::array< int, r > const &c_a, std::array< int, r > const &c_b, bool conjugate=false)
 Reordering scalar product of a CUDA dense tensor and a standard dense tensor. More...
 
template<Rank summed, Rank frank, Rank srank, typename Scalar , std::enable_if_t<(int(frank)+int(srank) - 2 *int(summed) > 0), int > = 0>
GenericDenseTensor< frank+srank - 2 *summed, Scalar > prodD (DenseTensor< frank, Scalar > const &a, CudaDenseTensor< srank, Scalar > const &b, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b, bool conjugate=false, EliminateZeros const ezeros=EliminateZeros::No, DenseProduct::TemporaryTransposeStorage< Scalar, frank, srank > *=nullptr)
 Product/Contraction of a standard dense tensor and a CUDA dense tensor. More...
 
template<Rank r, typename Scalar >
Scalar prodD (DenseTensor< r, Scalar > const &a, CudaDenseTensor< r, Scalar > const &b, std::array< int, r > const &c_a, std::array< int, r > const &c_b, bool conjugate=false)
 Reordering scalar product of a standard dense tensor and a CUDA dense tensor. More...
 

Detailed Description

Implementation namespace for CUDA dense tensors.