void syten::CudaDenseTensorImpl::cuda_transpose_recursive_impl | ( | CudaConstSpan< Scalar > | inp, |
CudaMutSpan< Scalar > | out, | ||
[[maybe_unused] ] CudaMutSpan< Scalar > | workspace, | ||
[[maybe_unused] ] ConstSpan< IndexNumber > | perm, | ||
[[maybe_unused] ] ConstSpan< Index > | dim | ||
) |
Recursive implementation of the CUDA tensor transposition.
References syten::LimVec< Type, max, MaxType, type >::begin(), syten::Cuda::CudaMutSpan< T >::begin(), syten::Deparallelise::cols(), syten::Cuda::copy(), cuda_transpose_r2(), syten::RepRegister::dim(), syten::LimVec< Type, max, MaxType, type >::end(), if_constexpr, std::is_sorted(), syten::rank(), syten::Deparallelise::rows(), and SYTEN_ASSERT.