LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
concatenate.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYERS_TRANSFORM_CONCATENATE_HPP_INCLUDED
28 #define LBANN_LAYERS_TRANSFORM_CONCATENATE_HPP_INCLUDED
29 
31 
33 #include "lbann/utils/distconv.hpp"
35 
37 #include "lbann/proto/layers.pb.h"
39 
40 namespace lbann {
41 
42 #ifdef LBANN_HAS_DISTCONV
43 template <typename TensorDataType, data_layout Layout, El::Device Device>
44 class concatenate_distconv_adapter
45  : public data_type_distconv_adapter<TensorDataType>
46 {
47 public:
48  using TensorDevType =
50  concatenate_distconv_adapter(Layer& layer)
51  : data_type_distconv_adapter<TensorDataType>(layer)
52  {}
53  virtual ~concatenate_distconv_adapter() = default;
54  dc::Shape get_activations_local_shape(int index = 0) const override;
55  void fp_compute();
56  void bp_compute();
57 };
58 #endif // LBANN_HAS_DISTCONV
59 
65 template <typename TensorDataType,
67  El::Device Device = El::Device::CPU>
68 class concatenate_layer : public data_type_layer<TensorDataType>
69 {
70 public:
71  concatenate_layer(lbann_comm* comm, size_t concat_dim);
72  concatenate_layer(const concatenate_layer& other) = default;
73  concatenate_layer& operator=(const concatenate_layer& other) = default;
74 
75  concatenate_layer* copy() const override;
76 
78 
80  template <typename ArchiveT>
81  void serialize(ArchiveT& ar);
82 
84 
85  std::string get_type() const override;
86  data_layout get_data_layout() const override;
87  El::Device get_device_allocation() const override;
88  bool can_run_inplace() const override { return false; }
89  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
90 
91  description get_description() const override;
92 
93 protected:
95  void write_specific_proto(lbann_data::Layer& proto) const final;
96 
97  El::SyncInfo<Device> syncSubGridCommunication = El::SyncInfo<Device>();
98 
99  friend class cereal::access;
101 
102  void setup_pointers() override;
103  void setup_dims() override;
104 
105  void fp_setup_outputs() override;
106  void bp_setup_gradient_wrt_inputs() override;
107  void fp_compute() override;
108  void bp_compute() override;
109 
110 private:
112  size_t m_concat_dim;
113 
114 #ifdef LBANN_HAS_GPU
115 
120  std::vector<unsigned char> m_workspace;
126  gpu_lib::event_wrapper m_workspace_event;
127 #endif // LBANN_HAS_GPU
128 
129  template <typename U>
130  friend void fp_compute_impl(concatenate_layer<U, Layout, Device>&, size_t);
131  template <typename U, El::Device D>
132  friend void
134  template <typename U>
135  friend void bp_compute_impl(concatenate_layer<U, Layout, Device>&, size_t);
136 
137  void fp_compute_subgrid();
138 
139  void bp_compute_subgrid();
140 
141 #ifdef LBANN_HAS_DISTCONV
142  friend class concatenate_distconv_adapter<TensorDataType, Layout, Device>;
143 
144 protected:
145  bool is_distconv_supported() const override
146  {
147  // Only supported for the channel dimension
148  return Device == El::Device::GPU && Layout == data_layout::DATA_PARALLEL &&
149  m_concat_dim == 0;
150  }
151  void setup_distconv_adapter() override
152  {
153  this->get_distconv_adapter_ptr() = std::make_unique<
154  concatenate_distconv_adapter<TensorDataType, Layout, Device>>(*this);
155  }
156  concatenate_distconv_adapter<TensorDataType, Layout, Device>&
157  get_distconv_adapter() override;
158  const concatenate_distconv_adapter<TensorDataType, Layout, Device>&
159  get_distconv_adapter() const override;
160 #endif // LBANN_HAS_DISTCONV
161 };
162 
163 // =========================================================
164 // Implementation
165 // =========================================================
166 
167 template <typename T, data_layout L, El::Device D>
169  lbann_data::Layer& proto) const
170 {
171  proto.set_datatype(proto::ProtoDataType<T>);
172  auto* msg = proto.mutable_concatenation();
173  msg->set_axis(m_concat_dim);
174 }
175 
176 template <typename TensorDataType, data_layout Layout, El::Device Device>
178  lbann_comm* comm,
179  size_t concat_dim)
180  : data_type_layer<TensorDataType>(comm), m_concat_dim{concat_dim}
181 {
182  this->m_expected_num_parent_layers = -1; // No limit on parents
183 }
184 
185 template <typename TensorDataType, data_layout Layout, El::Device Device>
188 {
189  return new concatenate_layer(*this);
190 }
191 
192 template <typename TensorDataType, data_layout Layout, El::Device Device>
194 {
195  return "concatenate";
196 }
197 
198 template <typename TensorDataType, data_layout Layout, El::Device Device>
201 {
202  return Layout;
203 }
204 
205 template <typename TensorDataType, data_layout Layout, El::Device Device>
208 {
209  return Device;
210 }
211 
212 template <typename TensorDataType, data_layout Layout, El::Device Device>
215 {
217  desc.add("Concatenation dimension", m_concat_dim);
218  return desc;
219 }
220 
221 template <typename TensorDataType, data_layout Layout, El::Device Device>
223 {
225  if (this->get_num_parents() < 1) {
227  " layer \"",
228  this->get_name(),
229  "\" ",
230  "has no parents");
231  }
232 }
233 
234 template <typename TensorDataType, data_layout Layout, El::Device Device>
236 {
238 
239  // Dimensions of first input tensor
240  auto output_dims = this->get_input_dims(0);
241  if (m_concat_dim >= output_dims.size()) {
242  std::ostringstream err;
243  err << get_type() << " layer \"" << this->get_name() << "\" "
244  << "is concatenating along dimension " << m_concat_dim << ", "
245  << "but it has a " << output_dims.size() << "-D input tensor "
246  << "(parent layer \"" << this->get_parent_layers()[0]->get_name()
247  << "\" "
248  << "outputs with dimensions ";
249  for (size_t d = 0; d < output_dims.size(); ++d) {
250  err << (d > 0 ? " x " : "") << output_dims[d];
251  }
252  err << ")";
253  LBANN_ERROR(err.str());
254  }
255 
256  // Dimensions of remaining input tensors
257  for (int j = 1; j < this->get_num_parents(); ++j) {
258  const auto& input_dims = this->get_input_dims(j);
259  if (input_dims.size() != output_dims.size() ||
260  !std::equal(input_dims.begin(),
261  input_dims.begin() + m_concat_dim,
262  output_dims.begin()) ||
263  !std::equal(input_dims.begin() + m_concat_dim + 1,
264  input_dims.end(),
265  output_dims.begin() + m_concat_dim + 1)) {
266  std::ostringstream err;
267  err << get_type() << " layer \"" << this->get_name() << "\" "
268  << "expects input tensors with dimensions ";
269  for (size_t d = 0; d < output_dims.size(); ++d) {
270  err << (d > 0 ? " x " : "");
271  if (d == m_concat_dim) {
272  err << "X";
273  }
274  else {
275  err << output_dims[d];
276  }
277  }
278  err << ", but parent layer "
279  << "\"" << this->get_parent_layers()[j]->get_name() << "\" "
280  << "outputs with dimensions ";
281  for (size_t d = 0; d < input_dims.size(); ++d) {
282  err << (d > 0 ? " x " : "") << input_dims[d];
283  }
284  LBANN_ERROR(err.str());
285  }
286  output_dims[m_concat_dim] += input_dims[m_concat_dim];
287  }
288 
289  // Model-parallel implementation only supports flat data
290  if (Layout == data_layout::MODEL_PARALLEL &&
291  get_linear_size(m_concat_dim, output_dims.data()) > 1) {
292  LBANN_ERROR(this->get_type(),
293  " layer \"",
294  this->get_name(),
295  "\" ",
296  "attempted to concatenate along dimension ",
297  m_concat_dim,
298  ", ",
299  "but model-parallel concatenate layer "
300  "only supports flat data");
301  }
302 
303  // Update output dimensions
304  this->set_output_dims(output_dims);
305 }
306 
307 template <typename TensorDataType, data_layout Layout, El::Device Device>
309 {
310 #ifdef LBANN_HAS_DISTCONV
311  if (!this->keep_original_outputs(0))
312  return;
313 #endif // LBANN_HAS_DISTCONV
314  const auto& input0 = this->get_prev_activations(0);
315  auto& output = this->get_activations();
316  output.Empty(false);
317  if (this->get_num_parents() == 1) {
318  El::LockedView(output, input0);
319  }
320  else {
321  if (this->subgraph_parallelism_execution() == false) {
322  output.AlignWith(input0);
323  }
324 
325  output.Resize(this->get_output_size(), input0.Width());
326  }
327 }
328 
329 template <typename TensorDataType, data_layout Layout, El::Device Device>
331 {
332  const auto& input_dims = this->get_input_dims(0);
333 
334  int split_dim = int(input_dims[m_concat_dim]);
335 
336  auto& input = this->get_activations();
337 
338  auto* ptr_input = dynamic_cast<
339  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Device>*>(
340  &input);
341 
342  El::copy::TranslateBetweenGridsGather<TensorDataType, Device, Device>(
343  *ptr_input,
344  this->get_all_prev_activations(),
345  split_dim,
346  this->get_subgrid_comm(),
348 }
349 
350 template <typename TensorDataType, data_layout Layout, El::Device Device>
352 {
353  const auto& input_dims = this->get_input_dims(0);
354 
355  int split_dim = int(input_dims[m_concat_dim] * this->get_num_parents());
356 
357  const auto& input_grad = this->get_prev_error_signals();
358 
359  auto const* ptr_input_grad = dynamic_cast<
360  El::
361  DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Device> const*>(
362  &input_grad);
363 
364  if (this->get_communication_flag() == COLL_OPT) {
365  El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
366  *ptr_input_grad,
367  this->get_all_error_signals(),
368  split_dim,
369  this->get_subgrid_comm(),
371  3);
372  }
373  else if (this->get_communication_flag() == COLL) {
374  El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
375  *ptr_input_grad,
376  this->get_all_error_signals(),
377  split_dim,
378  this->get_subgrid_comm(),
380  2);
381  }
382  else {
383  El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
384  *ptr_input_grad,
385  this->get_all_error_signals(),
386  split_dim,
387  this->get_subgrid_comm(),
389  1);
390  }
391 }
392 
393 template <typename TensorDataType, data_layout Layout, El::Device Device>
395 {
396  const auto& input_dims = this->get_input_dims();
397  const size_t num_dims = input_dims.size();
398 #ifdef LBANN_HAS_DISTCONV
399  if (this->distconv_enabled()) {
400  get_distconv_adapter().fp_compute();
401  return;
402  }
403 #endif
404 
405  // Just make a view if there is one input
406  if (this->get_num_parents() == 1) {
407  El::LockedView(this->get_activations(), this->get_prev_activations(0));
408  return;
409  }
410 
411  // Perform concatenation
412  if (m_concat_dim == num_dims - 1 && this->subgraph_parallelism_execution()) {
413  this->fp_compute_subgrid();
414  }
415  else {
417  }
418 }
419 
420 template <typename TensorDataType, El::Device Device>
423 {
424 #ifdef LBANN_HAS_DISTCONV
425  if (l.distconv_enabled()) {
426  LBANN_ERROR("Model-parallel LBANN matrix not supported in distconv");
427  }
428 #endif // LBANN_HAS_DISTCONV
429 
430  // Slice Elemental matrices
431  // Note: Assume each mini-batch sample is flat.
432  const size_t num_inputs = l.get_num_parents();
433  const auto& output_grad = l.get_prev_error_signals();
434  size_t offset = 0;
435  for (size_t j = 0; j < num_inputs; ++j) {
436  auto& input_grad = l.get_error_signals(j);
437  const auto& input_size = l.get_input_size(j);
438  El::LockedView(input_grad,
439  output_grad,
440  El::IR(offset, offset + input_size),
441  El::ALL);
442  offset += input_size;
443  }
444 }
445 
446 template <typename TensorDataType, El::Device Device>
449 {
450 
451  const size_t num_inputs = l.get_num_parents();
452  const auto& output_grad = l.get_prev_error_signals();
453  if (num_inputs == 1) {
454 #ifdef LBANN_HAS_DISTCONV
455  if (!l.keep_original_gradient_wrt_inputs(0))
456  return;
457 #endif
458  El::LockedView(l.get_error_signals(0), output_grad);
459  }
460  else {
461  for (size_t j = 0; j < num_inputs; ++j) {
462 #ifdef LBANN_HAS_DISTCONV
463  if (!l.keep_original_gradient_wrt_inputs(j))
464  continue;
465 #endif
466  auto& input_grad = l.get_error_signals(j);
467  if (l.subgraph_parallelism_execution() == false) {
468  input_grad.AlignWith(output_grad);
469  }
470  input_grad.Resize(l.get_input_size(j), output_grad.Width());
471  }
472  }
473 }
474 
475 template <typename TensorDataType, data_layout Layout, El::Device Device>
478 {
480 }
481 
482 template <typename TensorDataType, data_layout Layout, El::Device Device>
484 {
485 
486  const auto& input_dims = this->get_input_dims();
487  const size_t num_dims = input_dims.size();
488 
489 #ifdef LBANN_HAS_DISTCONV
490  if (this->distconv_enabled()) {
491  get_distconv_adapter().bp_compute();
492  return;
493  }
494 #endif
495 
496  // Just make a view if there is one input
497  if (this->get_num_parents() == 1) {
498  El::LockedView(this->get_error_signals(0), this->get_prev_error_signals());
499  return;
500  }
501 
502  // Perform slice
503  if (m_concat_dim == num_dims - 1 && this->subgraph_parallelism_execution()) {
504  this->bp_compute_subgrid();
505  }
506  else {
508  }
509 }
510 
511 #ifdef LBANN_HAS_DISTCONV
512 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
513 concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&
515 {
516  return const_cast<
517  concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&>(
518  static_cast<const concatenate_layer<TensorDataType, T_layout, Dev>&>(*this)
519  .get_distconv_adapter());
520 }
521 
522 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
523 const concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&
525 {
526  return dynamic_cast<
527  const concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&>(
529 }
530 
531 template <typename TensorDataType, data_layout Layout, El::Device Device>
532 dc::Shape concatenate_distconv_adapter<TensorDataType, Layout, Device>::
533  get_activations_local_shape(int index) const
534 {
535  assert_eq(index, 0);
536  auto shape = this->get_prev_activations().get_local_shape();
537  shape[-2] = this->get_activations_shape()[-2];
538  return shape;
539 }
540 
541 template <typename TensorDataType, data_layout Layout, El::Device Device>
542 void concatenate_distconv_adapter<TensorDataType, Layout, Device>::fp_compute()
543 {
544  assert_always(this->layer().get_num_parents() == 2);
545  dc::tensor::Concatenate(this->get_activations(0),
546  this->get_prev_activations(0),
547  this->get_prev_activations(1),
548  default_hydrogen_stream());
549 }
550 
551 template <typename TensorDataType, data_layout Layout, El::Device Device>
552 void concatenate_distconv_adapter<TensorDataType, Layout, Device>::bp_compute()
553 {
554  dc::tensor::Slice(this->get_error_signals(0),
555  this->get_error_signals(1),
556  this->get_prev_error_signals(0),
557  default_hydrogen_stream());
558 }
559 #endif // LBANN_HAS_DISTCONV
560 
561 #ifndef LBANN_CONCATENATE_LAYER_INSTANTIATE
562 
563 #define PROTO_DEVICE(T, Device) \
564  extern template class concatenate_layer<T, \
565  data_layout::DATA_PARALLEL, \
566  Device>; \
567  extern template class concatenate_layer<T, \
568  data_layout::MODEL_PARALLEL, \
569  Device>
570 
572 #undef PROTO_DEVICE
573 
574 #endif // LBANN_CONCATENATE_LAYER_INSTANTIATE
575 
576 } // namespace lbann
577 
578 #endif // LBANN_LAYERS_TRANSFORM_CONCATENATE_HPP_INCLUDED
void fp_setup_outputs() override
Setup output tensors. Called by the &#39;forward_prop&#39; function. Each output tensor is resized to match t...
bool distconv_enabled() const
Indicate whether distconv is enabled.
Definition: layer.hpp:1082
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
std::vector< std::unique_ptr< InputAbsDistMatrixType > > & get_all_error_signals()
void write_specific_proto(lbann_data::Layer &proto) const final
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
size_t m_concat_dim
Tensor dimension to concatenate along.
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
auto get_linear_size(std::vector< T > const &dims)
Definition: dim_helpers.hpp:59
friend void fp_compute_impl(concatenate_layer< U, Layout, Device > &, size_t)
El::SyncInfo< Device > syncSubGridCommunication
Definition: concatenate.hpp:97
#define LBANN_ERROR(...)
Definition: exception.hpp:37
int get_output_size(size_t output_index=0) const
Get output tensor size.
int get_num_parents() const noexcept
Get number of parent layers.
Definition: layer.hpp:574
void setup_pointers() override
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
Generates nicely formatted description messages.
Definition: description.hpp:49
std::vector< std::unique_ptr< InputAbsDistMatrixType > > & get_all_prev_activations()
virtual description get_description() const
Human-readable description.
constexpr El::Device Device
OutputAbsDistMatrixType & get_prev_error_signals(int child_index=0)
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: concatenate.hpp:89
Concatenate tensors along specified dimension.
Definition: concatenate.hpp:68
int get_input_size(size_t input_index=0) const
Get input tensor size.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
std::vector< const Layer * > get_parent_layers() const
::distconv::tensor::Shape Shape
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
concatenate_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
void bp_setup_gradient_wrt_inputs() override
Setup gradient w.r.t. input tensors. Called by the &#39;back_prop&#39; function. Each gradient w...
void bp_setup_gradient_wrt_inputs_impl(concatenate_layer< TensorDataType, data_layout::MODEL_PARALLEL, Device > &l)
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
friend void bp_compute_impl(concatenate_layer< U, Layout, Device > &, size_t)
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: concatenate.hpp:88
SubGraphCommunication get_communication_flag()
Definition: layer.hpp:487
virtual void setup_pointers()
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
std::string get_type() const override
Get the layer type&#39;s name.
friend void bp_setup_gradient_wrt_inputs_impl(concatenate_layer< U, Layout, D > &)
bool subgraph_parallelism_execution() const noexcept
Definition: layer.hpp:522
int m_expected_num_parent_layers
Definition: layer.hpp:838
description get_description() const override
Human-readable description.
const InputAbsDistMatrixType & get_error_signals(const Layer &parent) const override
dc::TensorDev< OutputTensorDataType > TensorDevType