void syten::CudaDenseTensorImpl::cuda_transpose_r2 | ( | CudaConstSpan< std::complex< double > > | inp, |
CudaMutSpan< std::complex< double > > | out, | ||
Index | inp_rows, | ||
Index | inp_cols, | ||
Conj const | conj = Conj::n() |
||
) |
Transposition of a rank-2 CUDA matrix from range inp
into range out
in row-major format with inp_rows
and inp_cols
respectively.
If conj
is true and the scalar type is complex, entries are complex-conjugated.
References conj(), syten::Cuda::get_handle(), syten::Cuda::handle_error(), syten::Cuda::host_device(), syten::Cuda::CudaStream::sync_destroy(), and syten::transpose_r2().