void syten::CudaDenseTensorImpl::cuda_transpose_recursive_impl | ( | CudaConstSpan< Scalar > | inp, |
CudaMutSpan< Scalar > | out, | ||
CudaMutSpan< Scalar > | workspace, | ||
ConstSpan< IndexNumber > | perm, | ||
ConstSpan< Index > | dim | ||
) |
Recursive implementation of the CUDA tensor transposition.
References syten::LimVec< Type, max, MaxType, type >::begin(), syten::Cuda::CudaMutSpan< T >::begin(), syten::Deparallelise::cols(), syten::Cuda::copy(), cuda_transpose_r2(), cuda_transpose_recursive_impl(), syten::RepRegister::dim(), syten::LimVec< Type, max, MaxType, type >::end(), if_constexpr, std::is_sorted(), syten::rank(), syten::Deparallelise::rows(), and SYTEN_ASSERT.
Referenced by cuda_transpose_recursive_impl().