LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
tensor_impl.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_UTILS_TENSOR_IMPL_HPP
28 #define LBANN_UTILS_TENSOR_IMPL_HPP
29 
30 #include "lbann/utils/tensor.hpp"
31 #include "lbann_config.hpp"
32 
33 #include <El/blas_like/level1/Copy/Translate.hpp>
34 #include <El/blas_like/level1/Copy/TranslateBetweenGrids.hpp>
35 
36 namespace lbann {
37 
38 template <typename TDT>
39 void do_tensor_copy(const BaseDistMat& src, El::AbstractDistMatrix<TDT>& tgt)
40 {
41  bool copy_async = false;
42 #if defined(LBANN_HAS_GPU)
43  auto src_dist_data = src.DistData();
44  auto tgt_dist_data = tgt.DistData();
45  // Asynchronously copy CPU data to GPU data if they are otherwise aligned
46  if ((src_dist_data.device == El::Device::CPU) &&
47  (tgt_dist_data.device == El::Device::GPU)) {
48  src_dist_data.device = El::Device::GPU;
49  copy_async = (src_dist_data == tgt_dist_data);
50  }
51 #endif // defined(LBANN_HAS_GPU)
52  if (copy_async) {
53  El::CopyAsync(src, tgt);
54  }
55  else {
56  if (src.DistData().grid == tgt.DistData().grid) {
57  El::Copy(src, tgt);
58  }
59  else {
61  }
62  }
63 }
64 
65 template <typename TDT>
67  const BaseDistMat& src,
68  El::AbstractDistMatrix<TDT>& tgt)
69 {
70 
71  // Determine matrix class and forward to template function
72  // Note: We use instantiate_device.hpp to deal with the annoyances
73  // of different FP16 types on CPU and GPU.
75  const auto& tgt_dist = tgt.DistData();
76  bool did_copy = false;
77 #undef PROTO_DEVICE
78 #undef PROTO_MATRIX_TYPE
79 #define PROTO_MATRIX_TYPE(T, ColDist, RowDist, Device) \
80  if constexpr (std::is_same<T, TDT>::value) { \
81  if (tgt_dist.colDist == ColDist && tgt_dist.rowDist == RowDist && \
82  tgt_dist.device == Device) { \
83  using TgtMatrixType = \
84  El::DistMatrix<T, ColDist, RowDist, El::ELEMENT, Device>; \
85  utils::details::do_tensor_copy_between_grids( \
86  src, \
87  dynamic_cast<TgtMatrixType&>(tgt)); \
88  did_copy = true; \
89  } \
90  }
91 #define PROTO_DEVICE(T, Device) \
92  PROTO_MATRIX_TYPE(T, El::STAR, El::VC, Device) \
93  PROTO_MATRIX_TYPE(T, El::MC, El::MR, Device) \
94  PROTO_MATRIX_TYPE(T, El::STAR, El::STAR, Device)
96 #undef PROTO_DEVICE
97 #undef PROTO_MATRIX_TYPE
98 
99  // Check if copy succeeded
100  if (!did_copy) {
101  const auto& src_dist = src.DistData();
102  LBANN_ERROR("Failed to copy between two tensors on different grids ",
103  "(src: colDist=",
104  int(src_dist.colDist),
105  ", ",
106  "rowDist=",
107  int(src_dist.rowDist),
108  ", ",
109  "device=",
110  int(src_dist.device),
111  "; "
112  "tgt: colDist=",
113  int(tgt_dist.colDist),
114  ", ",
115  "rowDist=",
116  int(tgt_dist.rowDist),
117  ", ",
118  "device=",
119  int(tgt_dist.device),
120  ")");
121  }
122 }
123 
124 template <typename TDT,
125  El::Dist ColDist,
126  El::Dist RowDist,
127  El::DistWrap Wrap,
130  const BaseDistMat& src,
131  El::DistMatrix<TDT, ColDist, RowDist, Wrap, Device>& tgt)
132 {
133 
134  // Make sure matrix layouts are identical
135  using TgtMatrixType = El::DistMatrix<TDT, ColDist, RowDist, Wrap, Device>;
136  auto src_dist = src.DistData();
137  TgtMatrixType temp(*src_dist.grid, src_dist.root);
138  if (temp.DistData() == src_dist) {
139  El::LockedView(temp, dynamic_cast<const TgtMatrixType&>(src));
140  }
141  else {
142  temp.Resize(src.Height(), src.Width());
143  if (temp.Participating()) {
144  El::Copy(src, temp);
145  }
146  }
147 
148  // Translate matrix between grids
149  tgt.Resize(src.Height(), src.Width());
150  El::copy::Translate(temp, tgt);
151 }
152 
153 template <typename TDT>
155  El::AbstractDistMatrix<TDT>& tgt,
156  bool locked_view)
157 {
158 
159  if (src.DistData() == tgt.DistData()) {
160  if (locked_view) {
161  El::LockedView(tgt, dynamic_cast<const El::AbstractDistMatrix<TDT>&>(src));
162  } else {
163  El::View(tgt, dynamic_cast<El::AbstractDistMatrix<TDT>&>(const_cast<BaseDistMat&>(src)));
164  }
165  }
166  else {
167  do_tensor_copy(src, tgt);
168  }
169 }
170 
171 } // namespace lbann
172 
173 #endif // LBANN_UTILS_TENSOR_IMPL_HPP
#define LBANN_ERROR(...)
Definition: exception.hpp:37
constexpr El::Device Device
void do_tensor_copy_between_grids(const BaseDistMat &src, El::AbstractDistMatrix< TDT > &tgt)
Copy between two tensors on different process grids.
Definition: tensor_impl.hpp:66
void view_or_copy_tensor(const BaseDistMat &src, El::AbstractDistMatrix< TDT > &tgt, bool locked_view=true)
If distributed tensors have the same distribution setup the target to use a view to the source tensor...
El::BaseDistMatrix BaseDistMat
Definition: base.hpp:121
void do_tensor_copy(const BaseDistMat &src, El::AbstractDistMatrix< TDT > &tgt)
Function to efficiently select the best method for copying between two distributed tensors...
Definition: tensor_impl.hpp:39
::distconv::tensor::Distribution Dist