d7/d1b/entrywise__batch__normalization_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.

 #ifndef LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED
 #define LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED

 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/layers/layer.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/proto/datatype_helpers.hpp"
 #include "lbann/proto/layers.pb.h"
 #include "lbann/utils/memory.hpp"

 namespace lbann {

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 class entrywise_batch_normalization_layer
   : public data_type_layer<TensorDataType>
 {
 public:

   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;

   using WeightsType = data_type_weights<TensorDataType>;


 public:
   entrywise_batch_normalization_layer(
     TensorDataType decay = El::To<TensorDataType>(0.9),
     TensorDataType epsilon = El::To<TensorDataType>(1e-5))
     : data_type_layer<TensorDataType>(nullptr),
       m_decay(decay),
       m_epsilon(epsilon)
   {}

   entrywise_batch_normalization_layer(
     const entrywise_batch_normalization_layer& other)
     : data_type_layer<TensorDataType>(other),
       m_decay(other.m_decay),
       m_epsilon(other.m_epsilon),
       m_batch_statistics(
         other.m_batch_statistics ? other.m_batch_statistics->Copy() : nullptr),
       m_batch_statistics_gradient(other.m_batch_statistics_gradient
                                     ? other.m_batch_statistics_gradient->Copy()
                                     : nullptr)
   {}

   entrywise_batch_normalization_layer&
   operator=(const entrywise_batch_normalization_layer& other)
   {
     data_type_layer<TensorDataType>::operator=(other);
     m_decay = other.m_decay;
     m_epsilon = other.m_epsilon;
     m_batch_statistics.reset(
       other.m_batch_statistics ? other.m_batch_statistics->Copy() : nullptr);
     m_batch_statistics_gradient.reset(
       other.m_batch_statistics_gradient
         ? other.m_batch_statistics_gradient->Copy()
         : nullptr);
     return *this;
   }

   entrywise_batch_normalization_layer* copy() const override
   {
     return new entrywise_batch_normalization_layer(*this);
   }
   std::string get_type() const override
   {
     return "entry-wise batch normalization";
   }
   data_layout get_data_layout() const override { return Layout; }
   El::Device get_device_allocation() const override { return Device; }
   bool can_run_inplace() const override { return false; }
   int get_backprop_requirements() const override
   {
     return ERROR_SIGNALS | PREV_ACTIVATIONS | WEIGHTS;
   }

   description get_description() const override
   {
     auto desc = data_type_layer<TensorDataType>::get_description();
     desc.add("Decay", m_decay);
     desc.add("Epsilon", m_epsilon);
     return desc;
   }


   template <typename ArchiveT>
   void serialize(ArchiveT& ar);


 protected:
   void write_specific_proto(lbann_data::Layer& proto) const final;

   void setup_data(size_t max_mini_batch_size) override
   {
     data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);

     // Initialize output dimensions
     this->set_output_dims(this->get_input_dims());
     const auto output_dims_ = this->get_output_dims();
     std::vector<size_t> output_dims(output_dims_.begin(), output_dims_.end());

     // Initialize default weights if none are provided
     if (this->num_weights() > 2) {
       LBANN_ERROR("attempted to setup layer \"",
                   this->get_name(),
                   "\" ",
                   "with an invalid number of weights ",
                   "(found ",
                   this->num_weights(),
                   ", expected 2)");
     }
     this->set_num_weights(2);
     if (!this->has_weights(0)) {
       auto w = std::make_shared<WeightsType>(*this->get_comm());
       auto init = std::make_unique<constant_initializer<TensorDataType>>(
         El::TypeTraits<TensorDataType>::Zero());
       w->set_name(this->get_name() + "_running_mean");
       w->set_initializer(std::move(init));
       this->set_weights(0, w);
       this->m_model->add_weights(std::move(w));
     }
     if (!this->has_weights(1)) {
       auto w = std::make_shared<WeightsType>(*this->get_comm());
       auto init = std::make_unique<constant_initializer<TensorDataType>>(
         El::TypeTraits<TensorDataType>::One());
       w->set_name(this->get_name() + "_running_variance");
       w->set_initializer(std::move(init));
       this->set_weights(1, w);
       this->m_model->add_weights(std::move(w));
     }

     // Setup weights
     auto dist = this->get_prev_activations().DistData();
     dist.rowDist = El::STAR;
     auto const num_weights = this->num_weights();
     for (size_t ii = 0; ii < num_weights; ++ii) {
       auto& w = this->get_weights(ii);
       w.set_dims(output_dims);
       w.set_matrix_distribution(dist);
     }

     // Initialize matrices
     m_batch_statistics.reset(AbsDistMatrixType::Instantiate(dist));
     m_batch_statistics_gradient.reset(AbsDistMatrixType::Instantiate(dist));
   }

   void fp_compute() override;
   void bp_compute() override;

 private:
   TensorDataType m_decay;
   TensorDataType m_epsilon;

   std::unique_ptr<AbsDistMatrixType> m_batch_statistics;
   std::unique_ptr<AbsDistMatrixType> m_batch_statistics_gradient;
 };

 template <typename T, data_layout L, El::Device D>
 void entrywise_batch_normalization_layer<T, L, D>::write_specific_proto(
   lbann_data::Layer& proto) const
 {
   proto.set_datatype(proto::ProtoDataType<T>);
   auto* msg = proto.mutable_entrywise_batch_normalization();
   msg->set_decay(m_decay);
   msg->set_epsilon(m_epsilon);
 }

 LBANN_DEFINE_LAYER_BUILDER(entrywise_batch_normalization);

 #ifndef LBANN_ENTRYWISE_BATCH_NORMALIZATION_LAYER_INSTANTIATE
 #define PROTO_DEVICE(T, Device)                                                \
   extern template class entrywise_batch_normalization_layer<                   \
     T,                                                                         \
     data_layout::DATA_PARALLEL,                                                \
     Device>;                                                                   \
   extern template class entrywise_batch_normalization_layer<                   \
     T,                                                                         \
     data_layout::MODEL_PARALLEL,                                               \
     Device>

 #include "lbann/macros/instantiate_device.hpp"
 #undef PROTO_DEVICE
 #endif // LBANN_ENTRYWISE_BATCH_NORMALIZATION_LAYER_INSTANTIATE

 } // namespace lbann

 #endif // LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED
lbann::WEIGHTS
Definition: base.hpp:210

lbann::ERROR_SIGNALS
Definition: base.hpp:207

lbann::data_type_weights
Definition: l2.hpp:41

lbann::Layer::get_comm
lbann_comm * get_comm() const

lbann::entrywise_batch_normalization_layer::bp_compute
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...

lbann::entrywise_batch_normalization_layer
Entry-wise batch normalization, including scale/bias.
Definition: entrywise_batch_normalization.hpp:52

lbann::PREV_ACTIVATIONS
Definition: base.hpp:208

lbann::entrywise_batch_normalization_layer::fp_compute
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.

LBANN_ERROR
#define LBANN_ERROR(...)
Definition: exception.hpp:37

lbann::entrywise_batch_normalization_layer::copy
entrywise_batch_normalization_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: entrywise_batch_normalization.hpp:103

lbann::entrywise_batch_normalization_layer::get_description
description get_description() const override
Human-readable description.
Definition: entrywise_batch_normalization.hpp:119

lbann::Layer::get_input_dims
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.

lbann::description
Generates nicely formatted description messages.
Definition: description.hpp:49

lbann::entrywise_batch_normalization_layer::write_specific_proto
void write_specific_proto(lbann_data::Layer &proto) const final
Definition: entrywise_batch_normalization.hpp:215

lbann::entrywise_batch_normalization_layer::entrywise_batch_normalization_layer
entrywise_batch_normalization_layer(const entrywise_batch_normalization_layer &other)
Definition: entrywise_batch_normalization.hpp:76

lbann::model::add_weights
void add_weights(OwningWeightsPtr &&w)
Add weights to model.

lbann::Layer::get_description
virtual description get_description() const
Human-readable description.

lbann::Device
constexpr El::Device Device
Definition: OperatorTraits.hpp:62

lbann::data_type_layer< TensorDataType >::get_prev_activations
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)

lbann::Layer::get_weights
weights const  & get_weights(size_t idx) const

lbann::entrywise_batch_normalization_layer::get_type
std::string get_type() const override
Get the layer type&#39;s name.
Definition: entrywise_batch_normalization.hpp:107

lbann::Layer::set_output_dims
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.

lbann::Layer::num_weights
size_t num_weights() const noexcept
Definition: layer.hpp:727

lbann::Layer::has_weights
bool has_weights() const noexcept
Definition: layer.hpp:728

lbann::data_type_layer
Definition: data_type_layer.hpp:69

lbann::entrywise_batch_normalization_layer::can_run_inplace
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: entrywise_batch_normalization.hpp:113

lbann::entrywise_batch_normalization_layer::entrywise_batch_normalization_layer
entrywise_batch_normalization_layer(TensorDataType decay=El::To< TensorDataType >(0.9), TensorDataType epsilon=El::To< TensorDataType >(1e-5))
Definition: entrywise_batch_normalization.hpp:68

layer.hpp

lbann::entrywise_batch_normalization_layer::m_batch_statistics_gradient
std::unique_ptr< AbsDistMatrixType > m_batch_statistics_gradient
Gradients w.r.t. current mini-batch statistics.
Definition: entrywise_batch_normalization.hpp:211

lbann::Layer::get_name
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332

lbann::Layer::set_num_weights
void set_num_weights(size_t n)
Definition: layer.hpp:733

lbann::entrywise_batch_normalization_layer::m_epsilon
TensorDataType m_epsilon
Definition: entrywise_batch_normalization.hpp:200

instantiate_device.hpp

lbann::entrywise_batch_normalization_layer::m_decay
TensorDataType m_decay
Definition: entrywise_batch_normalization.hpp:198

lbann::entrywise_batch_normalization_layer::get_device_allocation
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: entrywise_batch_normalization.hpp:112

lbann::entrywise_batch_normalization_layer::serialize
void serialize(ArchiveT &ar)

lbann::data_layout
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218

data_type_layer.hpp

lbann::entrywise_batch_normalization_layer::m_batch_statistics
std::unique_ptr< AbsDistMatrixType > m_batch_statistics
Current mini-batch statistics.
Definition: entrywise_batch_normalization.hpp:206

lbann::Layer::set_weights
void set_weights(size_t idx, ViewingWeightsPtr w)
Definition: layer.hpp:734

lbann::entrywise_batch_normalization_layer::get_data_layout
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: entrywise_batch_normalization.hpp:111

lbann::entrywise_batch_normalization_layer::AbsDistMatrixType
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
Definition: entrywise_batch_normalization.hpp:60

lbann::data_type_layer::setup_data
void setup_data(size_t max_mini_batch_size) override

lbann::LBANN_DEFINE_LAYER_BUILDER
LBANN_DEFINE_LAYER_BUILDER(elu)

lbann::entrywise_batch_normalization_layer::setup_data
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
Definition: entrywise_batch_normalization.hpp:139

lbann::Layer::get_output_dims
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.

lbann::entrywise_batch_normalization_layer::operator=
entrywise_batch_normalization_layer & operator=(const entrywise_batch_normalization_layer &other)
Definition: entrywise_batch_normalization.hpp:89

model.hpp

lbann::data_type_layer::operator=
data_type_layer & operator=(data_type_layer &&other)=default

memory.hpp

lbann::Layer::m_model
model * m_model
Reference to model managing this layer.
Definition: layer.hpp:845

lbann
Definition: callback_helpers.hpp:32

lbann::entrywise_batch_normalization_layer::get_backprop_requirements
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: entrywise_batch_normalization.hpp:114

datatype_helpers.hpp