SyTen

◆ cuda_transpose_kernel()

template<Rank rank, typename Scalar >
void syten::CudaDenseTensorImpl::cuda_transpose_kernel ( CudaConstSpan< Scalar >  inp,
CudaMutSpan< Scalar >  out,
ConstSpan< IndexNumber perm,
ConstSpan< Index dim,
Conj  do_conj 
)

Wrapper around the CUDA transpose kernels which sets everything up such that the functions in cuda_transpose_impl_cukrn.h really only have to launch the kernels.

References syten::Cuda::CudaMutSpan< T >::begin(), cuda_transpose_kernel_impl(), cukrn_transpose_max_rank, syten::Cuda::CudaPtr< T >::dev(), syten::RepRegister::dim(), syten::Cuda::CudaPtr< T >::get(), syten::pi, syten::rank(), syten::Cuda::select_device(), syten::Cuda::CudaMutSpan< T >::size(), syten::SpanImpl::ConstSpan< T >::size(), syten::Cuda::CudaStream::str(), syten::Cuda::CudaStream::sync_destroy(), SYTEN_ASSERT, and cukrn_transpose_array::values.

+ Here is the call graph for this function: