LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
cross_grid_sum.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYER_CROSS_GRID_SUM_HPP_INCLUDED
28 #define LBANN_LAYER_CROSS_GRID_SUM_HPP_INCLUDED
29 
32 
33 namespace lbann {
34 
35 template <typename TensorDataType, El::Device Dev>
36 class cross_grid_sum_layer final : public data_type_layer<TensorDataType>
37 {
38 public:
39  cross_grid_sum_layer(lbann_comm* comm) : data_type_layer<TensorDataType>(comm)
40  {
41  this->m_expected_num_parent_layers = -1; // No limit on parents
42  this->m_expected_num_child_layers = -1; // No limit on children
44  }
45 
46  cross_grid_sum_layer* copy() const final
47  {
48  return new cross_grid_sum_layer(*this);
49  }
50  std::string get_type() const final { return "cross_grid_sum"; }
52  {
54  }
55  El::Device get_device_allocation() const final { return Dev; }
56  bool can_run_inplace() const override { return false; }
57  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
58 
59 protected:
61  void write_specific_proto(lbann_data::Layer& proto) const final;
62 
63 private:
64  void setup_pointers() final
65  {
67  if (this->get_num_parents() < 1) {
69  " layer \"",
70  this->get_name(),
71  "\" has no parent layers");
72  }
73  }
74 
75  void setup_dims() final
76  {
78  this->set_output_dims(this->get_input_dims());
79 
80  // print dims
81 #ifdef LBANN_DEBUG
82  {
83  const auto& dims_print = this->get_input_dims();
84  auto const dims_size = dims_print.size();
85  for (auto ii = 0UL; ii < dims_size; ++ii) {
86  std::cout << "Index:" << ii << " dim" << dims_print[ii] << "\n";
87  }
88  }
89 #endif // LBANN_DEBUG
90 
91  // Check that input dimensions match
92  const auto& output_dims = this->get_output_dims();
93  for (int i = 0; i < this->get_num_parents(); ++i) {
94  if (this->get_input_dims(i) != output_dims) {
95  const auto& parents = this->get_parent_layers();
96  std::stringstream err;
97  err << get_type() << " layer \"" << this->get_name() << "\" "
98  << "has input tensors with incompatible dimensions (";
99  for (int j = 0; j < this->get_num_parents(); ++j) {
100  const auto& dims = this->get_input_dims(j);
101  err << (j > 0 ? ", " : "") << "layer \"" << parents[j]->get_name()
102  << "\" outputs ";
103  for (size_t k = 0; k < dims.size(); ++k) {
104  err << (k > 0 ? " x " : "") << dims[k];
105  }
106  }
107  err << ")";
108  LBANN_ERROR(err.str());
109  }
110  }
111  }
112 
113  void fp_compute() final
114  {
115  auto parents = this->get_parent_layers();
116  auto childs = this->get_child_layers();
117 
118  int tag = -1;
119  for (int i = 0; i < El::To<int>(parents.size()); i++) {
120  if (this->get_activations(i).Grid().InGrid())
121  tag = i;
122  }
123 
124  auto& output = this->get_activations(tag);
125  auto& input = this->get_prev_activations(tag);
126  El::Copy(input, output);
127 
128  auto& output_cast = dynamic_cast<
129  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>&>(
130  output);
131 
132  auto const syncInfoOutput =
133  El::SyncInfoFromMatrix(output_cast.LockedMatrix());
134 
135  const El::Int mloc = output_cast.LocalHeight();
136  const El::Int nloc = output_cast.LocalWidth();
137 
138  El::Matrix<TensorDataType, Dev> temp_output(mloc, nloc);
139 
140  El::Copy(output_cast.LockedMatrix(), temp_output);
141 
142  El::mpi::AllReduce(temp_output.Buffer(),
143  output_cast.Buffer(),
144  mloc * nloc,
145  El::mpi::SUM,
146  this->get_subgrid_comm(),
147  syncInfoOutput);
148  }
149 
150  void fp_setup_outputs() final
151  {
152 
153  if (this->get_num_children() < 1) {
154  return;
155  }
156  auto mini_batch_size =
158 
159  // Initialize output tensors
160  for (int i = 0; i < this->get_num_children(); ++i) {
161 
162  auto& output = this->get_activations(i);
163  output.Empty(false);
164  output.Resize(this->get_output_size(i), mini_batch_size);
165  }
166  }
167 
169  {
170  auto parents = this->get_parent_layers();
171  auto children = this->get_child_layers();
172 
173  int tag_parent = -1;
174  for (int i = 0; i < El::To<int>(parents.size()); i++) {
175  if (this->get_error_signals(i).Grid().InGrid())
176  tag_parent = parents[i]->get_grid_tag();
177  }
178  int const tag = tag_parent - 1;
179 
180  const auto& gradient_wrt_output = this->get_prev_error_signals(tag);
181  auto& gradient_wrt_input = this->get_error_signals(tag);
182 
183  int gradient_wrt_output_Height = gradient_wrt_output.Height();
184  int gradient_wrt_output_Width = gradient_wrt_output.Width();
185  for (int i = 0; i < El::To<int>(children.size()); i++) {
186  auto& gradient_wrt_input_cast = dynamic_cast<
187  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>&>(
188  this->get_error_signals(i));
189  gradient_wrt_input_cast.Resize(gradient_wrt_output_Height,
190  gradient_wrt_output_Width);
191  }
192 
193  El::Copy(gradient_wrt_output, gradient_wrt_input);
194 
195  auto& gradient_wrt_input_cast = dynamic_cast<
196  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>&>(
197  gradient_wrt_input);
198 
199  const El::Int mloc = gradient_wrt_input_cast.LocalHeight();
200  const El::Int nloc = gradient_wrt_input_cast.LocalWidth();
201 
202  El::Matrix<TensorDataType, Dev> temp_output(mloc, nloc);
203 
204  El::Copy(gradient_wrt_input_cast.LockedMatrix(), temp_output);
205 
206  El::AllReduce(gradient_wrt_input, this->get_subgrid_comm(), El::mpi::SUM);
207  }
208 
209  void bp_compute() final {}
210 };
211 
212 #ifndef LBANN_CROSS_GRID_SUM_LAYER_INSTANTIATE
213 #define PROTO_DEVICE(T, Device) \
214  extern template class cross_grid_sum_layer<T, Device>
215 
217 #undef PROTO_DEVICE
218 
219 #endif // LBANN_CROSS_GRID_SUM_LAYER_INSTANTIATE
220 
221 } // namespace lbann
222 
223 #endif // LBANN_LAYER_CROSS_GRID_SUM_HPP_INCLUDED
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
void write_specific_proto(lbann_data::Layer &proto) const final
El::Device get_device_allocation() const final
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
cross_grid_sum_layer * copy() const final
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
void setup_pointers() final
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
#define LBANN_ERROR(...)
Definition: exception.hpp:37
int get_grid_tag() const noexcept
Identifying tag for process grid.
int get_output_size(size_t output_index=0) const
Get output tensor size.
int get_num_parents() const noexcept
Get number of parent layers.
Definition: layer.hpp:574
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
void setup_dims() final
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
constexpr El::Device Device
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
OutputAbsDistMatrixType & get_prev_error_signals(int child_index=0)
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
El::Int infer_mini_batch_size_from_parents_or_default_to_current() const override
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override
int m_expected_num_child_layers
Expected number of child layers. A negative value indicates no limit.
Definition: layer.hpp:842
void fp_compute() final
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
int get_num_children() const noexcept
Get number of child layers.
Definition: layer.hpp:576
std::vector< const Layer * > get_parent_layers() const
void set_subgraph_parallelism_execution()
Definition: layer.hpp:515
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
std::vector< const Layer * > get_child_layers() const
std::string get_type() const final
Get the layer type&#39;s name.
cross_grid_sum_layer(lbann_comm *comm)
void bp_compute() final
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
void fp_setup_outputs() final
Setup output tensors. Called by the &#39;forward_prop&#39; function. Each output tensor is resized to match t...
virtual void setup_pointers()
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.
data_layout get_data_layout() const final
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
int m_expected_num_parent_layers
Definition: layer.hpp:838
const InputAbsDistMatrixType & get_error_signals(const Layer &parent) const override
void bp_setup_gradient_wrt_inputs() final
Setup gradient w.r.t. input tensors. Called by the &#39;back_prop&#39; function. Each gradient w...