void syten::CudaDenseTensorImpl::cuda_transpose_r2 | ( | CudaConstSpan< float > | inp, |
CudaMutSpan< float > | out, | ||
Index | inp_rows, | ||
Index | inp_cols, | ||
Conj const | conj = Conj::n() |
||
) |
Transposition of a rank-2 CUDA matrix from range inp
into range out
in row-major format with inp_rows
and inp_cols
respectively.
If conj
is true and the scalar type is complex, entries are complex-conjugated.
References syten::Cuda::CudaMutSpan< T >::begin(), syten::Cuda::CudaPtr< T >::get(), syten::Cuda::get_handle(), syten::Cuda::handle_error(), syten::Cuda::host_device(), syten::Cuda::CudaStream::sync_destroy(), and syten::transpose_r2().