LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
split.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYER_SPLIT_HPP_INCLUDED
28 #define LBANN_LAYER_SPLIT_HPP_INCLUDED
29 
32 #include "lbann/proto/lbann.pb.h"
33 #include "lbann/utils/distconv.hpp"
35 #include <vector>
36 
37 namespace lbann {
38 
39 #ifdef LBANN_HAS_DISTCONV
40 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
41 class split_distconv_adapter : public data_type_distconv_adapter<TensorDataType>
42 {
43 public:
44  using TensorDevType =
46  split_distconv_adapter(Layer& layer)
47  : data_type_distconv_adapter<TensorDataType>(layer)
48  {}
49  virtual ~split_distconv_adapter() = default;
50  void setup_distributions(tensor_overlap_constraints& constraints) override;
51  dc::Shape get_activations_local_shape(int index) const override;
52  std::unique_ptr<TensorDevType> setup_activations_i(int index) const override;
53  void bp_compute();
54 };
55 #endif // LBANN_HAS_DISTCONV
56 
69 template <typename TensorDataType,
71  El::Device Dev = El::Device::CPU>
72 class split_layer : public data_type_layer<TensorDataType>
73 {
74 public:
75  split_layer(lbann_comm* comm) : data_type_layer<TensorDataType>(comm)
76  {
77  this->m_expected_num_child_layers = -1; // No limit on children
78  }
79 
80  split_layer* copy() const override { return new split_layer(*this); }
81 
83 
85  template <typename ArchiveT>
86  void serialize(ArchiveT& ar);
87 
89  std::string get_type() const override { return "split"; }
90  data_layout get_data_layout() const override { return T_layout; }
91  El::Device get_device_allocation() const override { return Dev; }
92  bool can_run_inplace() const override { return false; }
93  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
94 
95 #ifdef LBANN_HAS_ONNX
96  void fill_onnx_node(onnx::GraphProto& graph) const override;
97 #endif // LBANN_HAS_ONNX
98 
99 protected:
101  void write_specific_proto(lbann_data::Layer& proto) const final;
102 
103  El::SyncInfo<Dev> syncSubGridCommunication = El::SyncInfo<Dev>();
104 
105  friend class cereal::access;
106  split_layer() : split_layer(nullptr) {}
107 
108  void setup_dims() override
109  {
111  for (int i = 0; i < this->get_num_children(); ++i) {
112  this->set_output_dims(this->get_input_dims(), i);
113  }
114  }
115 
116  void fp_setup_outputs() override
117  {
118 
119  const auto& input = this->get_prev_activations();
120  auto mini_batch_size =
121  this->infer_mini_batch_size_from_parents_or_default_to_current();
122 
123  if (this->subgraph_parallelism_execution()) {
124 
125  // if subgraph parallelism is enabled
126  auto const* ptr_input = dynamic_cast<El::DistMatrix<TensorDataType,
127  El::STAR,
128  El::VC,
129  El::ELEMENT,
130  Dev> const*>(&input);
131  int tag = 0;
132  auto childs = this->get_child_layers();
133  if (this->get_communication_flag() == COLL_OPT) {
134  El::copy::TranslateBetweenGridsBroadcast<TensorDataType, Dev, Dev>(
135  *ptr_input,
136  this->get_branch_tag_input_vector(),
137  this->get_subgrid_comm(),
138  syncSubGridCommunication);
139  }
140  else if (this->get_communication_flag() == COLL) {
141  El::copy::TranslateBetweenGridsBroadcast<TensorDataType, Dev, Dev>(
142  *ptr_input,
143  this->get_branch_tag_input_vector());
144  }
145  else {
146  for (int i = 0; i < childs[0]->get_num_spliting_groups(); i++) {
147 
148  this->get_branch_tag_input(i).Resize(ptr_input->Height(),
149  mini_batch_size);
150  El::Copy(input, this->get_branch_tag_input(i));
151  }
152  }
153  for (int i = 0; i < this->get_num_children(); ++i) {
154  tag = childs[i]->get_grid_tag();
155 
156  El::LockedView(this->get_activations(i),
157  this->get_branch_tag_input(tag - 1));
158  }
159  }
160  else {
161  // If sub-graph parallelism is not enabled
162  for (int i = 0; i < this->get_num_children(); ++i) {
163  El::LockedView(this->get_activations(i), input);
164  }
165  }
166  }
167 
168  void fp_compute() override {}
169 
170  void bp_compute() override
171  {
172 
173 #ifdef LBANN_HAS_DISTCONV
174  if (this->distconv_enabled()) {
175  get_distconv_adapter().bp_compute();
176  return;
177  }
178 #endif // LBANN_HAS_DISTCONV
179 
180  auto& gradient_wrt_input = this->get_error_signals();
181  auto childs = this->get_child_layers();
182 
183  if (this->subgraph_parallelism_execution()) {
184  int tag = 0;
185 
186  std::vector<bool> is_initialized_tensor(
187  childs[0]->get_num_spliting_groups(),
188  false);
189 
190  // Copy data internally with same branch tag
191  for (int i = 0; i < this->get_num_children(); ++i) {
192  tag = childs[i]->get_grid_tag();
193 
194  if (is_initialized_tensor[tag - 1]) {
195  El::Axpy(DataType(1),
196  this->get_prev_error_signals(i),
197  this->get_branch_tag_input(tag - 1));
198  }
199  else {
200  El::Copy(this->get_prev_error_signals(i),
201  this->get_branch_tag_input(tag - 1));
202  is_initialized_tensor[tag - 1] = true;
203  }
204  }
205 
206  // copy and add data from reduced gradients from same branch
207 
208  if (this->get_communication_flag() == COLL_OPT)
209  // If vector is enabled copy data using allreduce operation from
210  // aggregated subgrids to the gradient_wrt_input
211  {
212  auto* ptr_gradient = dynamic_cast<
213  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>*>(
214  &gradient_wrt_input);
215 
216  El::copy::TranslateBetweenGridsAllreduce<TensorDataType, Dev, Dev>(
217  *ptr_gradient,
218  this->get_branch_tag_input_vector(),
219  this->get_subgrid_comm(),
220  syncSubGridCommunication);
221  }
222  else if (this->get_communication_flag() == COLL) {
223  auto* ptr_gradient = dynamic_cast<
224  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>*>(
225  &gradient_wrt_input);
226 
227  El::copy::TranslateBetweenGridsAllreduce<TensorDataType, Dev, Dev>(
228  *ptr_gradient,
229  this->get_branch_tag_input_vector(),
230  1);
231  }
232  else {
233  if (this->get_num_children() > 0) {
234  El::Copy(this->get_branch_tag_input(0), gradient_wrt_input);
235  }
236  else {
237  El::Zero(gradient_wrt_input);
238  }
239 
240  for (int i = 1; i < childs[0]->get_num_spliting_groups(); i++) {
241 
242  El::Copy(this->get_branch_tag_input(i), this->get_temp_grad());
243  El::Axpy(DataType(1), this->get_temp_grad(), gradient_wrt_input);
244  }
245  }
246  }
247 
248  else {
249  if (this->get_num_children() > 0) {
250  El::Copy(this->get_prev_error_signals(0), gradient_wrt_input);
251  }
252  else {
253  El::Zero(gradient_wrt_input);
254  }
255  for (int i = 1; i < this->get_num_children(); ++i) {
256  El::Axpy(DataType(1),
257  this->get_prev_error_signals(i),
258  gradient_wrt_input);
259  }
260  }
261  }
262 
263 #ifdef LBANN_HAS_DISTCONV
264 protected:
265  bool is_distconv_supported() const override
266  {
267  return Dev == El::Device::GPU && T_layout == data_layout::DATA_PARALLEL;
268  }
269  void setup_distconv_adapter() override
270  {
271  this->get_distconv_adapter_ptr() =
272  std::make_unique<split_distconv_adapter<TensorDataType, T_layout, Dev>>(
273  *this);
274  }
275  split_distconv_adapter<TensorDataType, T_layout, Dev>&
276  get_distconv_adapter() override;
277  const split_distconv_adapter<TensorDataType, T_layout, Dev>&
278  get_distconv_adapter() const override;
279 #endif // LBANN_HAS_DISTCONV
280 };
281 
282 template <typename T, data_layout L, El::Device D>
283 void split_layer<T, L, D>::write_specific_proto(lbann_data::Layer& proto) const
284 {
285  proto.set_datatype(proto::ProtoDataType<T>);
286  proto.mutable_split();
287 }
288 
289 #ifdef LBANN_HAS_ONNX
290 template <typename T, data_layout L, El::Device D>
291 void split_layer<T, L, D>::fill_onnx_node(onnx::GraphProto& graph) const
292 {
293  const auto& parent = this->get_parent_layer();
294  const size_t idx_in_parent = parent.find_child_layer_index(*this);
295  for (auto const* child : this->get_child_layers()) {
296  auto* identity = graph.add_node();
297  identity->add_input(parent.get_name() + "_" +
298  std::to_string(idx_in_parent));
299  size_t idx = this->find_child_layer_index(*child);
300  identity->add_output(this->get_name() + "_" + std::to_string(idx));
301  identity->set_name(this->get_name() + "_" + std::to_string(idx));
302  identity->set_op_type("Identity");
303  identity->set_domain("");
304  identity->set_doc_string(this->get_type());
305  }
306 }
307 #endif // LBANN_HAS_ONNX
308 
309 #ifdef LBANN_HAS_DISTCONV
310 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
311 split_distconv_adapter<TensorDataType, T_layout, Dev>&
313 {
314  return const_cast<split_distconv_adapter<TensorDataType, T_layout, Dev>&>(
315  static_cast<const split_layer<TensorDataType, T_layout, Dev>&>(*this)
316  .get_distconv_adapter());
317 }
318 
319 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
320 const split_distconv_adapter<TensorDataType, T_layout, Dev>&
322 {
323  return dynamic_cast<
324  const split_distconv_adapter<TensorDataType, T_layout, Dev>&>(
326 }
327 
328 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
329 void split_distconv_adapter<TensorDataType, T_layout, Dev>::setup_distributions(
330  tensor_overlap_constraints& constraints)
331 {
333 
334  auto& x = this->get_prev_activations_dist();
335  auto& y = this->get_activations_dist();
336  auto& dx = this->get_error_signals_dist();
337  auto& dy = this->get_prev_error_signals_dist();
338 
339  constraints.mark_equivalent(x, y);
340  constraints.mark_equivalent(dx, dy);
341 }
342 
343 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
344 dc::Shape split_distconv_adapter<TensorDataType, T_layout, Dev>::
345  get_activations_local_shape(int index) const
346 {
348  TensorDataType>::get_activations_local_shape(0);
349 }
350 
351 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
352 std::unique_ptr<
353  typename split_distconv_adapter<TensorDataType, T_layout, Dev>::TensorDevType>
354 split_distconv_adapter<TensorDataType, T_layout, Dev>::setup_activations_i(
355  int index) const
356 {
357  return std::make_unique<TensorDevType>(this->get_prev_activations(0));
358 }
359 #endif // LBANN_HAS_DISTCONV
360 
361 #ifndef LBANN_SPLIT_LAYER_INSTANTIATE
362 #define PROTO_DEVICE(T, Device) \
363  extern template class split_layer<T, data_layout::DATA_PARALLEL, Device>; \
364  extern template class split_layer<T, data_layout::MODEL_PARALLEL, Device>
365 
367 #undef PROTO_DEVICE
368 #ifdef LBANN_HAS_DISTCONV
369 #define PROTO_DEVICE(T, Device) \
370  extern template class split_distconv_adapter<T, \
371  data_layout::DATA_PARALLEL, \
372  Device>; \
373  extern template class split_distconv_adapter<T, \
374  data_layout::MODEL_PARALLEL, \
375  Device>
376 
378 #undef PROTO_DEVICE
379 #endif // LBANN_HAS_DISTCONV
380 #endif // LBANN_SPLIT_LAYER_INSTANTIATE
381 
382 } // namespace lbann
383 
384 #endif // LBANN_LAYER_SPLIT_HPP_INCLUDED
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Present input tensor to multiple outputs.
Definition: split.hpp:72
void write_specific_proto(lbann_data::Layer &proto) const final
Definition: split.hpp:283
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: split.hpp:90
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
split_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: split.hpp:80
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Definition: split.hpp:108
constexpr El::Device Device
virtual void setup_distributions(tensor_overlap_constraints &constraints)
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: split.hpp:91
void mark_equivalent(dc::Dist &d1, dc::Dist &d2)
std::string to_string(El::Device const &d)
std::string get_type() const override
Get the layer type&#39;s name.
Definition: split.hpp:89
void identity(El::Matrix< uint8_t > &mat, El::Int height, El::Int width, El::Int channels=1)
Definition: helper.hpp:48
::distconv::tensor::Shape Shape
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
Definition: split.hpp:170
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: split.hpp:93
void fp_setup_outputs() override
Setup output tensors. Called by the &#39;forward_prop&#39; function. Each output tensor is resized to match t...
Definition: split.hpp:116
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
split_layer(lbann_comm *comm)
Definition: split.hpp:75
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: split.hpp:92
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
Definition: split.hpp:168
dc::TensorDev< OutputTensorDataType > TensorDevType