LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
entrywise_batch_normalization.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED
28 #define LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED
29 
31 #include "lbann/layers/layer.hpp"
32 #include "lbann/models/model.hpp"
34 #include "lbann/proto/layers.pb.h"
35 #include "lbann/utils/memory.hpp"
36 
37 namespace lbann {
38 
51 template <typename TensorDataType, data_layout Layout, El::Device Device>
53  : public data_type_layer<TensorDataType>
54 {
55 public:
57 
60  using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
61 
64 
66 
67 public:
69  TensorDataType decay = El::To<TensorDataType>(0.9),
70  TensorDataType epsilon = El::To<TensorDataType>(1e-5))
71  : data_type_layer<TensorDataType>(nullptr),
72  m_decay(decay),
73  m_epsilon(epsilon)
74  {}
75 
78  : data_type_layer<TensorDataType>(other),
79  m_decay(other.m_decay),
80  m_epsilon(other.m_epsilon),
82  other.m_batch_statistics ? other.m_batch_statistics->Copy() : nullptr),
84  ? other.m_batch_statistics_gradient->Copy()
85  : nullptr)
86  {}
87 
90  {
92  m_decay = other.m_decay;
93  m_epsilon = other.m_epsilon;
94  m_batch_statistics.reset(
95  other.m_batch_statistics ? other.m_batch_statistics->Copy() : nullptr);
98  ? other.m_batch_statistics_gradient->Copy()
99  : nullptr);
100  return *this;
101  }
102 
104  {
105  return new entrywise_batch_normalization_layer(*this);
106  }
107  std::string get_type() const override
108  {
109  return "entry-wise batch normalization";
110  }
111  data_layout get_data_layout() const override { return Layout; }
112  El::Device get_device_allocation() const override { return Device; }
113  bool can_run_inplace() const override { return false; }
114  int get_backprop_requirements() const override
115  {
117  }
118 
119  description get_description() const override
120  {
122  desc.add("Decay", m_decay);
123  desc.add("Epsilon", m_epsilon);
124  return desc;
125  }
126 
128 
130  template <typename ArchiveT>
131  void serialize(ArchiveT& ar);
132 
134 
135 protected:
137  void write_specific_proto(lbann_data::Layer& proto) const final;
138 
139  void setup_data(size_t max_mini_batch_size) override
140  {
141  data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);
142 
143  // Initialize output dimensions
144  this->set_output_dims(this->get_input_dims());
145  const auto output_dims_ = this->get_output_dims();
146  std::vector<size_t> output_dims(output_dims_.begin(), output_dims_.end());
147 
148  // Initialize default weights if none are provided
149  if (this->num_weights() > 2) {
150  LBANN_ERROR("attempted to setup layer \"",
151  this->get_name(),
152  "\" ",
153  "with an invalid number of weights ",
154  "(found ",
155  this->num_weights(),
156  ", expected 2)");
157  }
158  this->set_num_weights(2);
159  if (!this->has_weights(0)) {
160  auto w = std::make_shared<WeightsType>(*this->get_comm());
161  auto init = std::make_unique<constant_initializer<TensorDataType>>(
162  El::TypeTraits<TensorDataType>::Zero());
163  w->set_name(this->get_name() + "_running_mean");
164  w->set_initializer(std::move(init));
165  this->set_weights(0, w);
166  this->m_model->add_weights(std::move(w));
167  }
168  if (!this->has_weights(1)) {
169  auto w = std::make_shared<WeightsType>(*this->get_comm());
170  auto init = std::make_unique<constant_initializer<TensorDataType>>(
171  El::TypeTraits<TensorDataType>::One());
172  w->set_name(this->get_name() + "_running_variance");
173  w->set_initializer(std::move(init));
174  this->set_weights(1, w);
175  this->m_model->add_weights(std::move(w));
176  }
177 
178  // Setup weights
179  auto dist = this->get_prev_activations().DistData();
180  dist.rowDist = El::STAR;
181  auto const num_weights = this->num_weights();
182  for (size_t ii = 0; ii < num_weights; ++ii) {
183  auto& w = this->get_weights(ii);
184  w.set_dims(output_dims);
185  w.set_matrix_distribution(dist);
186  }
187 
188  // Initialize matrices
189  m_batch_statistics.reset(AbsDistMatrixType::Instantiate(dist));
190  m_batch_statistics_gradient.reset(AbsDistMatrixType::Instantiate(dist));
191  }
192 
193  void fp_compute() override;
194  void bp_compute() override;
195 
196 private:
198  TensorDataType m_decay;
200  TensorDataType m_epsilon;
201 
206  std::unique_ptr<AbsDistMatrixType> m_batch_statistics;
211  std::unique_ptr<AbsDistMatrixType> m_batch_statistics_gradient;
212 };
213 
214 template <typename T, data_layout L, El::Device D>
216  lbann_data::Layer& proto) const
217 {
218  proto.set_datatype(proto::ProtoDataType<T>);
219  auto* msg = proto.mutable_entrywise_batch_normalization();
220  msg->set_decay(m_decay);
221  msg->set_epsilon(m_epsilon);
222 }
223 
224 LBANN_DEFINE_LAYER_BUILDER(entrywise_batch_normalization);
225 
226 #ifndef LBANN_ENTRYWISE_BATCH_NORMALIZATION_LAYER_INSTANTIATE
227 #define PROTO_DEVICE(T, Device) \
228  extern template class entrywise_batch_normalization_layer< \
229  T, \
230  data_layout::DATA_PARALLEL, \
231  Device>; \
232  extern template class entrywise_batch_normalization_layer< \
233  T, \
234  data_layout::MODEL_PARALLEL, \
235  Device>
236 
238 #undef PROTO_DEVICE
239 #endif // LBANN_ENTRYWISE_BATCH_NORMALIZATION_LAYER_INSTANTIATE
240 
241 } // namespace lbann
242 
243 #endif // LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED
lbann_comm * get_comm() const
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
Entry-wise batch normalization, including scale/bias.
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
#define LBANN_ERROR(...)
Definition: exception.hpp:37
entrywise_batch_normalization_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
description get_description() const override
Human-readable description.
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Generates nicely formatted description messages.
Definition: description.hpp:49
void write_specific_proto(lbann_data::Layer &proto) const final
entrywise_batch_normalization_layer(const entrywise_batch_normalization_layer &other)
void add_weights(OwningWeightsPtr &&w)
Add weights to model.
virtual description get_description() const
Human-readable description.
constexpr El::Device Device
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
weights const & get_weights(size_t idx) const
std::string get_type() const override
Get the layer type&#39;s name.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
size_t num_weights() const noexcept
Definition: layer.hpp:727
bool has_weights() const noexcept
Definition: layer.hpp:728
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
entrywise_batch_normalization_layer(TensorDataType decay=El::To< TensorDataType >(0.9), TensorDataType epsilon=El::To< TensorDataType >(1e-5))
std::unique_ptr< AbsDistMatrixType > m_batch_statistics_gradient
Gradients w.r.t. current mini-batch statistics.
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
void set_num_weights(size_t n)
Definition: layer.hpp:733
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
std::unique_ptr< AbsDistMatrixType > m_batch_statistics
Current mini-batch statistics.
void set_weights(size_t idx, ViewingWeightsPtr w)
Definition: layer.hpp:734
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
void setup_data(size_t max_mini_batch_size) override
LBANN_DEFINE_LAYER_BUILDER(elu)
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.
entrywise_batch_normalization_layer & operator=(const entrywise_batch_normalization_layer &other)
data_type_layer & operator=(data_type_layer &&other)=default
model * m_model
Reference to model managing this layer.
Definition: layer.hpp:845
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.