dc/d7a/embedding_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.

 #ifndef LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED
 #define LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED

 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/models/model.hpp"
 #include "lbann/proto/datatype_helpers.hpp"
 #include "lbann/proto/layers.pb.h"
 #include "lbann/utils/memory.hpp"

 namespace lbann {

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 class embedding_layer : public data_type_layer<TensorDataType>
 {
   static_assert(Layout == data_layout::DATA_PARALLEL,
                 "embedding layer only supports data parallel layout");

 public:

   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;

   using WeightsType = data_type_weights<TensorDataType>;

   using OptimizerType = data_type_optimizer<TensorDataType>;


 public:
   embedding_layer(size_t num_embeddings,
                   size_t embedding_dim,
                   El::Int padding_idx = -1);

   embedding_layer(const embedding_layer& other);
   embedding_layer& operator=(const embedding_layer& other);
   ~embedding_layer() = default;

   embedding_layer* copy() const override;
   std::string get_type() const override;
   data_layout get_data_layout() const override;
   El::Device get_device_allocation() const override;
   bool can_run_inplace() const override { return false; }
   int get_backprop_requirements() const override
   {
     return ERROR_SIGNALS | WEIGHTS | PREV_ACTIVATIONS;
   }

   description get_description() const override;


   template <typename ArchiveT>
   void serialize(ArchiveT& ar);


 protected:
   void write_specific_proto(lbann_data::Layer& proto) const final;

   friend class cereal::access;
   embedding_layer();

   void setup_dims() override;
   void setup_data(size_t max_mini_batch_size) override;

   void fp_compute() override;
   void bp_compute() override;

 private:
   size_t m_num_embeddings;
   size_t m_embedding_dim;
   El::Int m_padding_idx;

   std::unique_ptr<AbsDistMatrixType> m_embeddings_grad;
 };

 // =========================================================
 // Implementation
 // =========================================================

 template <typename T, data_layout L, El::Device D>
 void embedding_layer<T, L, D>::write_specific_proto(
   lbann_data::Layer& proto) const
 {
   proto.set_datatype(proto::ProtoDataType<T>);
   auto* msg = proto.mutable_embedding();
   msg->set_num_embeddings(m_num_embeddings);
   msg->set_embedding_dim(m_embedding_dim);
   msg->mutable_padding_idx()->set_value(m_padding_idx);
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 embedding_layer<TensorDataType, Layout, Device>::embedding_layer(
   size_t num_embeddings,
   size_t embedding_dim,
   El::Int padding_idx)
   : data_type_layer<TensorDataType>(nullptr),
     m_num_embeddings{num_embeddings},
     m_embedding_dim{embedding_dim},
     m_padding_idx{padding_idx}
 {}

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 embedding_layer<TensorDataType, Layout, Device>::embedding_layer()
   : embedding_layer(0, 0, 0)
 {}

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 embedding_layer<TensorDataType, Layout, Device>::embedding_layer(
   const embedding_layer<TensorDataType, Layout, Device>& other)
   : data_type_layer<TensorDataType>(other),
     m_num_embeddings{other.m_num_embeddings},
     m_embedding_dim{other.m_embedding_dim},
     m_padding_idx{other.m_padding_idx},
     m_embeddings_grad(other.m_embeddings_grad ? other.m_embeddings_grad->Copy()
                                               : nullptr)
 {}

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 embedding_layer<TensorDataType, Layout, Device>&
 embedding_layer<TensorDataType, Layout, Device>::operator=(
   const embedding_layer<TensorDataType, Layout, Device>& other)
 {
   data_type_layer<TensorDataType>::operator=(other);
   m_num_embeddings = other.m_num_embeddings;
   m_embedding_dim = other.m_embedding_dim;
   m_padding_idx = other.m_padding_idx;
   m_embeddings_grad.reset(
     other.m_embeddings_grad ? other.m_embeddings_grad->Copy() : nullptr);
   return *this;
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 embedding_layer<TensorDataType, Layout, Device>*
 embedding_layer<TensorDataType, Layout, Device>::copy() const
 {
   return new embedding_layer(*this);
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 std::string embedding_layer<TensorDataType, Layout, Device>::get_type() const
 {
   return "embedding";
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 data_layout
 embedding_layer<TensorDataType, Layout, Device>::get_data_layout() const
 {
   return Layout;
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 El::Device
 embedding_layer<TensorDataType, Layout, Device>::get_device_allocation() const
 {
   return Device;
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 description
 embedding_layer<TensorDataType, Layout, Device>::get_description() const
 {
   auto desc = data_type_layer<TensorDataType>::get_description();
   desc.add("Num embeddings", m_num_embeddings);
   desc.add("Embedding dim", m_embedding_dim);
   desc.add("Padding index", m_padding_idx);
   return desc;
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 void embedding_layer<TensorDataType, Layout, Device>::setup_dims()
 {
   data_type_layer<TensorDataType>::setup_dims();
   auto dims = this->get_input_dims();
   dims.push_back(static_cast<int>(m_embedding_dim));
   this->set_output_dims(dims);
 }

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 void embedding_layer<TensorDataType, Layout, Device>::setup_data(
   size_t max_mini_batch_size)
 {
   data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);

   // Construct default weights if needed
   // Note: Randomly drawn from normal distribution with mean 0 and
   // standard deviation 1.
   if (!this->has_weights()) {
     auto w = std::make_shared<WeightsType>(*this->get_comm());
     auto init = std::make_unique<normal_initializer<TensorDataType>>(
       El::TypeTraits<TensorDataType>::Zero(),
       El::TypeTraits<TensorDataType>::One());
     auto opt = this->m_model->template create_optimizer<TensorDataType>();
     w->set_name(this->get_name() + "_weights");
     w->set_initializer(std::move(init));
     w->set_optimizer(std::move(opt));
     this->add_weights(w);
     this->m_model->add_weights(std::move(w));
   }
   if (this->num_weights() != 1) {
     LBANN_ERROR("attempted to setup ",
                 this->get_type(),
                 " layer \"",
                 this->get_name(),
                 "\" ",
                 "with an invalid number of weights ",
                 "(expected 1, found ",
                 this->num_weights(),
                 ")");
   }

   // Initialize dictionary
   auto& embeddings = this->get_weights(0);
   auto matrix_dist = this->get_prev_activations().DistData();
   matrix_dist.colDist = El::STAR;
   matrix_dist.rowDist = El::STAR;
   embeddings.set_dims({m_embedding_dim}, {m_num_embeddings});
   embeddings.set_matrix_distribution(matrix_dist);
   embeddings.setup();

   // Zero out embedding vector for padding index
   if (0 <= m_padding_idx &&
       m_padding_idx < static_cast<El::Int>(m_embedding_dim)) {
     // FIXME (trb 06/01/2020): Assuming embedding values have data
     // type that matches this layer. In future, we should abstract
     // this or dynamically dispatch it.
     auto& embedding_values =
       dynamic_cast<AbsDistMatrixType&>(embeddings.get_values());
     std::unique_ptr<AbsDistMatrixType> pad_embedding(
       embedding_values.Construct(embedding_values.Grid(),
                                  embedding_values.Root()));
     El::View(*pad_embedding, embedding_values, El::ALL, El::IR(m_padding_idx));
     El::Zero(*pad_embedding);
   }

   // Initialize gradient w.r.t. embeddings
   {
     auto& embedding_values =
       dynamic_cast<AbsDistMatrixType&>(embeddings.get_values());
     this->m_embeddings_grad.reset(
       embedding_values.Construct(embedding_values.Grid(),
                                  embedding_values.Root()));
     m_embeddings_grad->Resize(m_embedding_dim, m_num_embeddings);
   }
 }

 LBANN_DEFINE_LAYER_BUILDER(embedding);

 #ifndef LBANN_EMBEDDING_LAYER_INSTANTIATE

 #define PROTO_DEVICE(T, Device)                                                \
   extern template class embedding_layer<T, data_layout::DATA_PARALLEL, Device>

 #include "lbann/macros/instantiate_device.hpp"
 #undef PROTO_DEVICE

 #endif // LBANN_EMBEDDING_LAYER_INSTANTIATE

 } // namespace lbann

 #endif // LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED
lbann::WEIGHTS
Definition: base.hpp:210

lbann::embedding_layer::write_specific_proto
void write_specific_proto(lbann_data::Layer &proto) const final
Definition: embedding.hpp:143

lbann::embedding_layer::embedding_layer
embedding_layer()
Definition: embedding.hpp:165

lbann::embedding_layer::fp_compute
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.

lbann::Layer::setup_dims
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.

lbann::embedding_layer::copy
embedding_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: embedding.hpp:196

lbann::ERROR_SIGNALS
Definition: base.hpp:207

lbann::data_type_weights
Definition: l2.hpp:41

lbann::embedding_layer::can_run_inplace
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: embedding.hpp:94

lbann::Layer::get_comm
lbann_comm * get_comm() const

lbann::embedding_layer::bp_compute
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...

lbann::embedding_layer::m_num_embeddings
size_t m_num_embeddings
Definition: embedding.hpp:125

lbann::PREV_ACTIVATIONS
Definition: base.hpp:208

lbann::embedding_layer::serialize
void serialize(ArchiveT &ar)

LBANN_ERROR
#define LBANN_ERROR(...)
Definition: exception.hpp:37

lbann::Layer::get_input_dims
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.

lbann::embedding_layer::setup_data
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
Definition: embedding.hpp:242

lbann::description
Generates nicely formatted description messages.
Definition: description.hpp:49

lbann::embedding_layer::setup_dims
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Definition: embedding.hpp:233

lbann::embedding_layer::get_device_allocation
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: embedding.hpp:216

lbann::embedding_layer::AbsDistMatrixType
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
Definition: embedding.hpp:63

lbann::model::add_weights
void add_weights(OwningWeightsPtr &&w)
Add weights to model.

lbann::Layer::get_description
virtual description get_description() const
Human-readable description.

lbann::Device
constexpr El::Device Device
Definition: OperatorTraits.hpp:62

lbann::data_type_layer< TensorDataType >::get_prev_activations
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)

lbann::embedding_layer::m_embeddings_grad
std::unique_ptr< AbsDistMatrixType > m_embeddings_grad
Definition: embedding.hpp:135

lbann::Layer::get_weights
weights const  & get_weights(size_t idx) const

lbann::model::set_name
void set_name(std::string name)
Metadata Accessors.

lbann::Layer::set_output_dims
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.

lbann::Layer::num_weights
size_t num_weights() const noexcept
Definition: layer.hpp:727

lbann::Layer::has_weights
bool has_weights() const noexcept
Definition: layer.hpp:728

lbann::embedding_layer::get_backprop_requirements
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: embedding.hpp:95

lbann::embedding_layer::m_embedding_dim
size_t m_embedding_dim
Definition: embedding.hpp:127

lbann::embedding_layer::get_data_layout
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: embedding.hpp:209

lbann::data_type_layer
Definition: data_type_layer.hpp:69

lbann::embedding_layer::m_padding_idx
El::Int m_padding_idx
Definition: embedding.hpp:132

lbann::embedding_layer::get_description
description get_description() const override
Human-readable description.
Definition: embedding.hpp:223

lbann::data_layout::DATA_PARALLEL

lbann::Layer::get_name
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332

instantiate_device.hpp

lbann::embedding_layer::access
friend class cereal::access
Definition: embedding.hpp:114

lbann::data_layout
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218

data_type_layer.hpp

lbann::embedding_layer::~embedding_layer
~embedding_layer()=default

lbann::data_type_optimizer
Definition: l2.hpp:39

lbann::embedding_layer::get_type
std::string get_type() const override
Get the layer type&#39;s name.
Definition: embedding.hpp:202

lbann::data_type_layer::setup_data
void setup_data(size_t max_mini_batch_size) override

lbann::LBANN_DEFINE_LAYER_BUILDER
LBANN_DEFINE_LAYER_BUILDER(elu)

lbann::embedding_layer::operator=
embedding_layer & operator=(const embedding_layer &other)
Definition: embedding.hpp:182

lbann::embedding_layer
Lookup table to vectors of fixed size.
Definition: embedding.hpp:53

lbann::Layer::add_weights
void add_weights(ViewingWeightsPtr w)
Definition: layer.hpp:723

model.hpp

lbann::data_type_layer::operator=
data_type_layer & operator=(data_type_layer &&other)=default

memory.hpp

lbann::Layer::m_model
model * m_model
Reference to model managing this layer.
Definition: layer.hpp:845

lbann
Definition: callback_helpers.hpp:32

datatype_helpers.hpp