Transposition functions for CUDA tensors. More...
#include "inc/dense/cuda.h"
#include "inc/dense/cuda_transpose_impl.h"
#include "inc/util/cuda_span.h"
#include "inc/util/limvec.h"
#include "inc/util/pair.h"
Namespaces | |
namespace | syten |
Syten namespace. | |
namespace | syten::CudaDenseTensorImpl |
Implementation namespace for CUDA dense tensors. | |
Functions | |
template<Rank rank, typename Scalar , TransposeMethod method = TransposeMethod::Default, Rank... Ranks> | |
void | syten::CudaDenseTensorImpl::cuda_transpose_impl (CudaConstSpan< Scalar > inp, CudaMutSpan< Scalar > out, ConstSpan< Index > in_perm, ConstSpan< Index > in_dim, Conj do_conj=Conj::n(), bool do_checks=true, std::index_sequence< Ranks... >={}) |
Implementation of the CUDA tensor transposition routines, to be called only from cuda_transpose(). More... | |
void | syten::CudaDenseTensorImpl::cuda_transpose_r2 (CudaConstSpan< double > inp, CudaMutSpan< double > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n()) |
Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More... | |
void | syten::CudaDenseTensorImpl::cuda_transpose_r2 (CudaConstSpan< float > inp, CudaMutSpan< float > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n()) |
Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More... | |
void | syten::CudaDenseTensorImpl::cuda_transpose_r2 (CudaConstSpan< std::complex< double > > inp, CudaMutSpan< std::complex< double > > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n()) |
Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More... | |
void | syten::CudaDenseTensorImpl::cuda_transpose_r2 (CudaConstSpan< std::complex< float > > inp, CudaMutSpan< std::complex< float > > out, Index inp_rows, Index inp_cols, Conj const conj=Conj::n()) |
Transposition of a rank-2 CUDA matrix from range inp into range out in row-major format with inp_rows and inp_cols respectively. More... | |
template<Rank rank, typename Scalar > | |
void | syten::CudaDenseTensorImpl::cuda_transpose_recursive (CudaConstSpan< Scalar > inp, CudaMutSpan< Scalar > out, ConstSpan< IndexNumber > in_perm, ConstSpan< Index > in_dim, Conj do_conj=Conj::n()) |
Entry point for the CUDA recursive tensor transposition implementation, to be called only from cuda_transpose_impl(). More... | |
template<Rank rank, typename Scalar > | |
void | syten::CudaDenseTensorImpl::cuda_transpose_recursive_impl (CudaConstSpan< Scalar > inp, CudaMutSpan< Scalar > out, CudaMutSpan< Scalar > workspace, ConstSpan< IndexNumber > perm, ConstSpan< Index > dim) |
Recursive implementation of the CUDA tensor transposition. More... | |
Transposition functions for CUDA tensors.