Implementations of CUDA products. More...
#include "inc/dense/cuda.h"
#include "inc/dense/cuda_conj.h"
#include "inc/dense/dense_prod.h"
#include "inc/util/cuda_support.h"
#include "inc/dense/cuda_transpose.h"
#include "inc/dense/generic.h"
#include "inc/dense/dense_prod_tmp_transpose_storage.h"
Namespaces | |
namespace | syten |
Syten namespace. | |
namespace | syten::CudaDenseTensorImpl |
Implementation namespace for CUDA dense tensors. | |
Functions | |
void | syten::CudaDenseTensorImpl::cuda_mm_cm (CudaPtr< const double > at, CudaPtr< const double > bt, CudaPtr< double > rt, Size const dx, Size const dy, Size const dz) |
CUDA real matrix-matrix multiplication. More... | |
void | syten::CudaDenseTensorImpl::cuda_mm_cm (CudaPtr< const float > at, CudaPtr< const float > bt, CudaPtr< float > rt, Size const dx, Size const dy, Size const dz) |
CUDA real float matrix-matrix multiplication. More... | |
void | syten::CudaDenseTensorImpl::cuda_mm_cm (CudaPtr< const std::complex< double > > at, CudaPtr< const std::complex< double > > bt, CudaPtr< std::complex< double > > rt, Size const dx, Size const dy, Size const dz) |
CUDA complex matrix-matrix multiplication. More... | |
void | syten::CudaDenseTensorImpl::cuda_mm_cm (CudaPtr< const std::complex< float > > at, CudaPtr< const std::complex< float > > bt, CudaPtr< std::complex< float > > rt, Size const dx, Size const dy, Size const dz) |
CUDA complex float matrix-matrix multiplication. More... | |
template<Rank summed, Rank frank, Rank srank, Rank rrank, typename Scalar > | |
void | syten::CudaDenseTensorImpl::gemm_transpose (CudaDenseTensor< frank, Scalar > const &a, CudaDenseTensor< srank, Scalar > const &b, CudaDenseTensor< rrank, Scalar > &r, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b) |
Last part of a transpose-transpose-gemm-transpose CUDA tensor contraction. More... | |
template<Rank summed, Rank frank, Rank srank, typename Scalar , std::enable_if_t<(int(frank)+int(srank) - 2 *int(summed) > 0), int > = 0> | |
CudaDenseTensor< frank+srank - 2 *summed, Scalar > | syten::CudaDenseTensorImpl::prodD (CudaDenseTensor< frank, Scalar > const &a, CudaDenseTensor< srank, Scalar > const &b, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b, bool conjugate=false, EliminateZeros const ezeros=EliminateZeros::No, DenseProduct::TemporaryTransposeStorage< Scalar, frank, srank > *=nullptr) |
Product/Contraction of two dense CUDA tensors. More... | |
template<Rank summed, Rank frank, Rank srank, typename Scalar , std::enable_if_t<(int(frank)+int(srank) - 2 *int(summed) > 0), int > = 0> | |
GenericDenseTensor< frank+srank - 2 *summed, Scalar > | syten::CudaDenseTensorImpl::prodD (CudaDenseTensor< frank, Scalar > const &a, DenseTensor< srank, Scalar > const &b, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b, bool conjugate=false, EliminateZeros const ezeros=EliminateZeros::No, DenseProduct::TemporaryTransposeStorage< Scalar, frank, srank > *=nullptr) |
Product/Contraction of a CUDA dense tensor and a standard dense tensor. More... | |
template<Rank r, typename Scalar > | |
Scalar | syten::CudaDenseTensorImpl::prodD (CudaDenseTensor< r, Scalar > const &a, CudaDenseTensor< r, Scalar > const &b, std::array< int, r > const &c_a, std::array< int, r > const &c_b, bool conjugate=false) |
Reordering scalar product of two CUDA dense tensors. More... | |
template<Rank r, typename Scalar > | |
Scalar | syten::CudaDenseTensorImpl::prodD (CudaDenseTensor< r, Scalar > const &a, DenseTensor< r, Scalar > const &b, std::array< int, r > const &c_a, std::array< int, r > const &c_b, bool conjugate=false) |
Reordering scalar product of a CUDA dense tensor and a standard dense tensor. More... | |
template<Rank summed, Rank frank, Rank srank, typename Scalar , std::enable_if_t<(int(frank)+int(srank) - 2 *int(summed) > 0), int > = 0> | |
GenericDenseTensor< frank+srank - 2 *summed, Scalar > | syten::CudaDenseTensorImpl::prodD (DenseTensor< frank, Scalar > const &a, CudaDenseTensor< srank, Scalar > const &b, std::array< int, frank > const &c_a, std::array< int, srank > const &c_b, bool conjugate=false, EliminateZeros const ezeros=EliminateZeros::No, DenseProduct::TemporaryTransposeStorage< Scalar, frank, srank > *=nullptr) |
Product/Contraction of a standard dense tensor and a CUDA dense tensor. More... | |
template<Rank r, typename Scalar > | |
Scalar | syten::CudaDenseTensorImpl::prodD (DenseTensor< r, Scalar > const &a, CudaDenseTensor< r, Scalar > const &b, std::array< int, r > const &c_a, std::array< int, r > const &c_b, bool conjugate=false) |
Reordering scalar product of a standard dense tensor and a CUDA dense tensor. More... | |
Implementations of CUDA products.