d6/d0e/channelwise__scale__bias_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.

 #ifndef LBANN_LAYER_LEARNING_CHANNELWISE_SCALE_BIAS_HPP_INCLUDED
 #define LBANN_LAYER_LEARNING_CHANNELWISE_SCALE_BIAS_HPP_INCLUDED

 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/proto/datatype_helpers.hpp"
 #include "lbann/proto/layers.pb.h"
 #include "lbann/utils/exception.hpp"

 namespace lbann {

 template <typename TensorDataType,
           data_layout Layout = data_layout::DATA_PARALLEL,
           El::Device Device = El::Device::CPU>
 class channelwise_scale_bias_layer : public data_type_layer<TensorDataType>
 {
   static_assert(Layout == data_layout::DATA_PARALLEL,
                 "channelwise_mean_layer only supports "
                 "data-parallel data layout");

 public:

   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;

   using WeightsType = data_type_weights<TensorDataType>;

   using OptimizerType = data_type_optimizer<TensorDataType>;


 public:
   channelwise_scale_bias_layer(lbann_comm* comm = nullptr);
   channelwise_scale_bias_layer(const channelwise_scale_bias_layer& other);
   channelwise_scale_bias_layer&
   operator=(const channelwise_scale_bias_layer& other);

   channelwise_scale_bias_layer* copy() const override
   {
     return new channelwise_scale_bias_layer(*this);
   }

   std::string get_type() const override { return "channel-wise scale/bias"; }
   data_layout get_data_layout() const override { return Layout; }
   El::Device get_device_allocation() const override { return Device; }
   bool can_run_inplace() const override { return true; }
   int get_backprop_requirements() const override
   {
     return ERROR_SIGNALS | WEIGHTS | PREV_ACTIVATIONS;
   }

   void setup_data(size_t max_mini_batch_size) override;


   template <typename ArchiveT>
   void serialize(ArchiveT& ar);


 protected:
   void write_specific_proto(lbann_data::Layer& proto) const final;

   void fp_compute() override;
   void bp_compute() override;

 private:
   std::unique_ptr<AbsDistMatrixType> m_weights_gradient;
 };

 // Implementation

 template <typename T, data_layout L, El::Device D>
 void channelwise_scale_bias_layer<T, L, D>::write_specific_proto(
   lbann_data::Layer& proto) const
 {
   proto.set_datatype(proto::ProtoDataType<T>);
   proto.mutable_channelwise_scale_bias();
 }

 template <typename TensorDataType, data_layout Layout, El::Device Dev>
 channelwise_scale_bias_layer<TensorDataType, Layout, Dev>::
   channelwise_scale_bias_layer(lbann_comm* comm)
   : data_type_layer<TensorDataType>(comm)
 {}

 template <typename TensorDataType, data_layout Layout, El::Device Dev>
 channelwise_scale_bias_layer<TensorDataType, Layout, Dev>::
   channelwise_scale_bias_layer(const channelwise_scale_bias_layer& other)
   : data_type_layer<TensorDataType>(other),
     m_weights_gradient(
       other.m_weights_gradient ? other.m_weights_gradient->Copy() : nullptr)
 {}

 template <typename TensorDataType, data_layout Layout, El::Device Dev>
 auto channelwise_scale_bias_layer<TensorDataType, Layout, Dev>::operator=(
   const channelwise_scale_bias_layer& other) -> channelwise_scale_bias_layer&
 {
   data_type_layer<TensorDataType>::operator=(other);
   m_weights_gradient.reset(
     other.m_weights_gradient ? other.m_weights_gradient->Copy() : nullptr);
   return *this;
 }

 template <typename TensorDataType, data_layout Layout, El::Device Dev>
 void channelwise_scale_bias_layer<TensorDataType, Layout, Dev>::setup_data(
   size_t max_mini_batch_size)
 {
   data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);
   const El::Int num_channels = this->get_output_dims()[0];

   // Construct default weights if needed
   // Note: Scale is initialized to 1 and bias to 0
   if (!this->has_weights()) {
     auto w = std::make_shared<WeightsType>(*this->get_comm());
     std::vector<TensorDataType> vals(2 * num_channels,
                                      El::TypeTraits<TensorDataType>::Zero());
     std::fill(vals.begin(),
               vals.begin() + num_channels,
               El::TypeTraits<TensorDataType>::One());
     auto init = std::make_unique<value_initializer<TensorDataType>>(vals);
     auto opt = this->m_model->template create_optimizer<TensorDataType>();
     w->set_name(this->get_name() + "_weights");
     w->set_initializer(std::move(init));
     w->set_optimizer(std::move(opt));
     this->add_weights(w);
     this->m_model->add_weights(std::move(w));
   }
   if (this->num_weights() != 1) {
     LBANN_ERROR("attempted to setup ",
                 this->get_type(),
                 " layer \"",
                 this->get_name(),
                 "\" ",
                 "with an invalid number of weights ",
                 "(expected 1, found ",
                 this->num_weights(),
                 ")");
   }

   // Setup weights
   auto dist = this->get_prev_activations().DistData();
   dist.colDist = El::STAR;
   dist.rowDist = El::STAR;
   this->get_weights(0).set_dims({static_cast<size_t>(num_channels)}, {2});
   this->get_weights(0).set_matrix_distribution(dist);

   // Setup gradient w.r.t. weights
   m_weights_gradient.reset(AbsDistMatrixType::Instantiate(dist));
   m_weights_gradient->AlignWith(dist);
   m_weights_gradient->Resize(num_channels, 2);
 }

 LBANN_DEFINE_LAYER_BUILDER(channelwise_scale_bias);

 #ifndef LBANN_CHANNELWISE_SCALE_BIAS_LAYER_INSTANTIATE

 #define PROTO_DEVICE(T, Device)                                                \
   extern template class channelwise_scale_bias_layer<                          \
     T,                                                                         \
     data_layout::DATA_PARALLEL,                                                \
     Device>;

 #include "lbann/macros/instantiate_device.hpp"
 #undef PROTO_DEVICE

 #endif // LBANN_CHANNELWISE_SCALE_BIAS_LAYER_INSTANTIATE

 } // namespace lbann

 #endif // LBANN_LAYER_LEARNING_CHANNELWISE_SCALE_BIAS_HPP_INCLUDED
lbann::WEIGHTS
Definition: base.hpp:210

lbann::protobuf::fill
void fill(std::istream &is, google::protobuf::Message &msg)
Fill the protobuf message from a binary stream.

lbann::channelwise_scale_bias_layer::fp_compute
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.

lbann::ERROR_SIGNALS
Definition: base.hpp:207

lbann::data_type_weights
Definition: l2.hpp:41

lbann::channelwise_scale_bias_layer::can_run_inplace
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: channelwise_scale_bias.hpp:95

lbann::Layer::get_comm
lbann_comm * get_comm() const

lbann::PREV_ACTIVATIONS
Definition: base.hpp:208

lbann::channelwise_scale_bias_layer::get_device_allocation
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: channelwise_scale_bias.hpp:94

lbann::channelwise_scale_bias_layer::get_data_layout
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: channelwise_scale_bias.hpp:93

LBANN_ERROR
#define LBANN_ERROR(...)
Definition: exception.hpp:37

lbann::lbann_comm
Definition: comm.hpp:105

lbann::model::add_weights
void add_weights(OwningWeightsPtr &&w)
Add weights to model.

lbann::channelwise_scale_bias_layer::serialize
void serialize(ArchiveT &ar)

lbann::Device
constexpr El::Device Device
Definition: OperatorTraits.hpp:62

lbann::channelwise_scale_bias_layer::operator=
channelwise_scale_bias_layer & operator=(const channelwise_scale_bias_layer &other)
Definition: channelwise_scale_bias.hpp:148

lbann::data_type_layer< TensorDataType >::get_prev_activations
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)

lbann::Layer::get_weights
weights const  & get_weights(size_t idx) const

lbann::model::set_name
void set_name(std::string name)
Metadata Accessors.

lbann::Layer::num_weights
size_t num_weights() const noexcept
Definition: layer.hpp:727

lbann::Layer::has_weights
bool has_weights() const noexcept
Definition: layer.hpp:728

lbann::data_type_layer
Definition: data_type_layer.hpp:69

lbann::weights::set_matrix_distribution
void set_matrix_distribution(El::DistData dist)

lbann::channelwise_scale_bias_layer::AbsDistMatrixType
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
Definition: channelwise_scale_bias.hpp:71

lbann::data_layout::DATA_PARALLEL

lbann::Layer::get_name
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332

exception.hpp

instantiate_device.hpp

lbann::channelwise_scale_bias_layer::get_type
std::string get_type() const override
Get the layer type&#39;s name.
Definition: channelwise_scale_bias.hpp:92

lbann::channelwise_scale_bias_layer::m_weights_gradient
std::unique_ptr< AbsDistMatrixType > m_weights_gradient
Objective function gradient w.r.t. weights.
Definition: channelwise_scale_bias.hpp:120

lbann::channelwise_scale_bias_layer
Apply per-channel scale and bias.
Definition: channelwise_scale_bias.hpp:60

lbann::data_layout
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218

data_type_layer.hpp

lbann::channelwise_scale_bias_layer::channelwise_scale_bias_layer
channelwise_scale_bias_layer(lbann_comm *comm=nullptr)
Definition: channelwise_scale_bias.hpp:135

lbann::data_type_optimizer
Definition: l2.hpp:39

lbann::weights::set_dims
void set_dims(std::vector< size_t > matrix_height_dims, std::vector< size_t > matrix_width_dims={})

lbann::data_type_layer::setup_data
void setup_data(size_t max_mini_batch_size) override

lbann::LBANN_DEFINE_LAYER_BUILDER
LBANN_DEFINE_LAYER_BUILDER(elu)

lbann::Layer::get_output_dims
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.

lbann::channelwise_scale_bias_layer::write_specific_proto
void write_specific_proto(lbann_data::Layer &proto) const final
Definition: channelwise_scale_bias.hpp:126

lbann::channelwise_scale_bias_layer::copy
channelwise_scale_bias_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: channelwise_scale_bias.hpp:87

lbann::Layer::add_weights
void add_weights(ViewingWeightsPtr w)
Definition: layer.hpp:723

lbann::channelwise_scale_bias_layer::bp_compute
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...

model.hpp

lbann::data_type_layer::operator=
data_type_layer & operator=(data_type_layer &&other)=default

lbann::Layer::m_model
model * m_model
Reference to model managing this layer.
Definition: layer.hpp:845

lbann
Definition: callback_helpers.hpp:32

lbann::channelwise_scale_bias_layer::get_backprop_requirements
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: channelwise_scale_bias.hpp:96

lbann::channelwise_scale_bias_layer::setup_data
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
Definition: channelwise_scale_bias.hpp:158

datatype_helpers.hpp