27 #ifndef LBANN_UTILS_TENSOR_IMPL_HPP 28 #define LBANN_UTILS_TENSOR_IMPL_HPP 31 #include "lbann_config.hpp" 33 #include <El/blas_like/level1/Copy/Translate.hpp> 34 #include <El/blas_like/level1/Copy/TranslateBetweenGrids.hpp> 38 template <
typename TDT>
41 bool copy_async =
false;
42 #if defined(LBANN_HAS_GPU) 43 auto src_dist_data = src.DistData();
44 auto tgt_dist_data = tgt.DistData();
46 if ((src_dist_data.device == El::Device::CPU) &&
47 (tgt_dist_data.device == El::Device::GPU)) {
48 src_dist_data.device = El::Device::GPU;
49 copy_async = (src_dist_data == tgt_dist_data);
51 #endif // defined(LBANN_HAS_GPU) 53 El::CopyAsync(src, tgt);
56 if (src.DistData().grid == tgt.DistData().grid) {
65 template <
typename TDT>
68 El::AbstractDistMatrix<TDT>& tgt)
75 const auto& tgt_dist = tgt.DistData();
76 bool did_copy =
false;
78 #undef PROTO_MATRIX_TYPE 79 #define PROTO_MATRIX_TYPE(T, ColDist, RowDist, Device) \ 80 if constexpr (std::is_same<T, TDT>::value) { \ 81 if (tgt_dist.colDist == ColDist && tgt_dist.rowDist == RowDist && \ 82 tgt_dist.device == Device) { \ 83 using TgtMatrixType = \ 84 El::DistMatrix<T, ColDist, RowDist, El::ELEMENT, Device>; \ 85 utils::details::do_tensor_copy_between_grids( \ 87 dynamic_cast<TgtMatrixType&>(tgt)); \ 91 #define PROTO_DEVICE(T, Device) \ 92 PROTO_MATRIX_TYPE(T, El::STAR, El::VC, Device) \ 93 PROTO_MATRIX_TYPE(T, El::MC, El::MR, Device) \ 94 PROTO_MATRIX_TYPE(T, El::STAR, El::STAR, Device) 97 #undef PROTO_MATRIX_TYPE 101 const auto& src_dist = src.DistData();
102 LBANN_ERROR(
"Failed to copy between two tensors on different grids ",
104 int(src_dist.colDist),
107 int(src_dist.rowDist),
110 int(src_dist.device),
113 int(tgt_dist.colDist),
116 int(tgt_dist.rowDist),
119 int(tgt_dist.device),
124 template <
typename TDT,
131 El::DistMatrix<TDT, ColDist, RowDist, Wrap, Device>& tgt)
135 using TgtMatrixType = El::DistMatrix<TDT, ColDist, RowDist, Wrap, Device>;
136 auto src_dist = src.DistData();
137 TgtMatrixType temp(*src_dist.grid, src_dist.root);
138 if (temp.DistData() == src_dist) {
139 El::LockedView(temp, dynamic_cast<const TgtMatrixType&>(src));
142 temp.Resize(src.Height(), src.Width());
143 if (temp.Participating()) {
149 tgt.Resize(src.Height(), src.Width());
150 El::copy::Translate(temp, tgt);
153 template <
typename TDT>
155 El::AbstractDistMatrix<TDT>& tgt,
159 if (src.DistData() == tgt.DistData()) {
161 El::LockedView(tgt,
dynamic_cast<const El::AbstractDistMatrix<TDT>&
>(src));
163 El::View(tgt,
dynamic_cast<El::AbstractDistMatrix<TDT>&
>(const_cast<BaseDistMat&>(src)));
173 #endif // LBANN_UTILS_TENSOR_IMPL_HPP
constexpr El::Device Device
void do_tensor_copy_between_grids(const BaseDistMat &src, El::AbstractDistMatrix< TDT > &tgt)
Copy between two tensors on different process grids.
void view_or_copy_tensor(const BaseDistMat &src, El::AbstractDistMatrix< TDT > &tgt, bool locked_view=true)
If distributed tensors have the same distribution setup the target to use a view to the source tensor...
El::BaseDistMatrix BaseDistMat
void do_tensor_copy(const BaseDistMat &src, El::AbstractDistMatrix< TDT > &tgt)
Function to efficiently select the best method for copying between two distributed tensors...
::distconv::tensor::Distribution Dist