void syten::CudaDenseTensorImpl::cuda_transpose_recursive | ( | CudaConstSpan< Scalar > | inp, |
CudaMutSpan< Scalar > | out, | ||
ConstSpan< IndexNumber > | in_perm, | ||
ConstSpan< Index > | in_dim, | ||
Conj | do_conj = Conj::n() |
||
) |
Entry point for the CUDA recursive tensor transposition implementation, to be called only from cuda_transpose_impl().
References syten::Cuda::CudaDynArray< Type >::begin(), syten::Cuda::conj_copy(), syten::Cuda::host_device(), syten::rank(), syten::SpanImpl::ConstSpan< T >::size(), and SYTEN_ASSERT.