LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
embedding.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED
28 #define LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED
29 
31 #include "lbann/models/model.hpp"
33 #include "lbann/proto/layers.pb.h"
34 #include "lbann/utils/memory.hpp"
35 
36 namespace lbann {
37 
52 template <typename TensorDataType, data_layout Layout, El::Device Device>
53 class embedding_layer : public data_type_layer<TensorDataType>
54 {
55  static_assert(Layout == data_layout::DATA_PARALLEL,
56  "embedding layer only supports data parallel layout");
57 
58 public:
60 
63  using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
64 
67 
70 
72 
73 public:
82  embedding_layer(size_t num_embeddings,
83  size_t embedding_dim,
84  El::Int padding_idx = -1);
85 
86  embedding_layer(const embedding_layer& other);
88  ~embedding_layer() = default;
89 
90  embedding_layer* copy() const override;
91  std::string get_type() const override;
92  data_layout get_data_layout() const override;
93  El::Device get_device_allocation() const override;
94  bool can_run_inplace() const override { return false; }
95  int get_backprop_requirements() const override
96  {
98  }
99 
100  description get_description() const override;
101 
103 
105  template <typename ArchiveT>
106  void serialize(ArchiveT& ar);
107 
109 
110 protected:
112  void write_specific_proto(lbann_data::Layer& proto) const final;
113 
114  friend class cereal::access;
115  embedding_layer();
116 
117  void setup_dims() override;
118  void setup_data(size_t max_mini_batch_size) override;
119 
120  void fp_compute() override;
121  void bp_compute() override;
122 
123 private:
132  El::Int m_padding_idx;
133 
135  std::unique_ptr<AbsDistMatrixType> m_embeddings_grad;
136 };
137 
138 // =========================================================
139 // Implementation
140 // =========================================================
141 
142 template <typename T, data_layout L, El::Device D>
144  lbann_data::Layer& proto) const
145 {
146  proto.set_datatype(proto::ProtoDataType<T>);
147  auto* msg = proto.mutable_embedding();
148  msg->set_num_embeddings(m_num_embeddings);
149  msg->set_embedding_dim(m_embedding_dim);
150  msg->mutable_padding_idx()->set_value(m_padding_idx);
151 }
152 
153 template <typename TensorDataType, data_layout Layout, El::Device Device>
155  size_t num_embeddings,
156  size_t embedding_dim,
157  El::Int padding_idx)
158  : data_type_layer<TensorDataType>(nullptr),
159  m_num_embeddings{num_embeddings},
160  m_embedding_dim{embedding_dim},
161  m_padding_idx{padding_idx}
162 {}
163 
164 template <typename TensorDataType, data_layout Layout, El::Device Device>
166  : embedding_layer(0, 0, 0)
167 {}
168 
169 template <typename TensorDataType, data_layout Layout, El::Device Device>
172  : data_type_layer<TensorDataType>(other),
174  m_embedding_dim{other.m_embedding_dim},
175  m_padding_idx{other.m_padding_idx},
176  m_embeddings_grad(other.m_embeddings_grad ? other.m_embeddings_grad->Copy()
177  : nullptr)
178 {}
179 
180 template <typename TensorDataType, data_layout Layout, El::Device Device>
184 {
189  m_embeddings_grad.reset(
190  other.m_embeddings_grad ? other.m_embeddings_grad->Copy() : nullptr);
191  return *this;
192 }
193 
194 template <typename TensorDataType, data_layout Layout, El::Device Device>
197 {
198  return new embedding_layer(*this);
199 }
200 
201 template <typename TensorDataType, data_layout Layout, El::Device Device>
203 {
204  return "embedding";
205 }
206 
207 template <typename TensorDataType, data_layout Layout, El::Device Device>
210 {
211  return Layout;
212 }
213 
214 template <typename TensorDataType, data_layout Layout, El::Device Device>
217 {
218  return Device;
219 }
220 
221 template <typename TensorDataType, data_layout Layout, El::Device Device>
224 {
226  desc.add("Num embeddings", m_num_embeddings);
227  desc.add("Embedding dim", m_embedding_dim);
228  desc.add("Padding index", m_padding_idx);
229  return desc;
230 }
231 
232 template <typename TensorDataType, data_layout Layout, El::Device Device>
234 {
236  auto dims = this->get_input_dims();
237  dims.push_back(static_cast<int>(m_embedding_dim));
238  this->set_output_dims(dims);
239 }
240 
241 template <typename TensorDataType, data_layout Layout, El::Device Device>
243  size_t max_mini_batch_size)
244 {
245  data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);
246 
247  // Construct default weights if needed
248  // Note: Randomly drawn from normal distribution with mean 0 and
249  // standard deviation 1.
250  if (!this->has_weights()) {
251  auto w = std::make_shared<WeightsType>(*this->get_comm());
252  auto init = std::make_unique<normal_initializer<TensorDataType>>(
253  El::TypeTraits<TensorDataType>::Zero(),
254  El::TypeTraits<TensorDataType>::One());
255  auto opt = this->m_model->template create_optimizer<TensorDataType>();
256  w->set_name(this->get_name() + "_weights");
257  w->set_initializer(std::move(init));
258  w->set_optimizer(std::move(opt));
259  this->add_weights(w);
260  this->m_model->add_weights(std::move(w));
261  }
262  if (this->num_weights() != 1) {
263  LBANN_ERROR("attempted to setup ",
264  this->get_type(),
265  " layer \"",
266  this->get_name(),
267  "\" ",
268  "with an invalid number of weights ",
269  "(expected 1, found ",
270  this->num_weights(),
271  ")");
272  }
273 
274  // Initialize dictionary
275  auto& embeddings = this->get_weights(0);
276  auto matrix_dist = this->get_prev_activations().DistData();
277  matrix_dist.colDist = El::STAR;
278  matrix_dist.rowDist = El::STAR;
279  embeddings.set_dims({m_embedding_dim}, {m_num_embeddings});
280  embeddings.set_matrix_distribution(matrix_dist);
281  embeddings.setup();
282 
283  // Zero out embedding vector for padding index
284  if (0 <= m_padding_idx &&
285  m_padding_idx < static_cast<El::Int>(m_embedding_dim)) {
286  // FIXME (trb 06/01/2020): Assuming embedding values have data
287  // type that matches this layer. In future, we should abstract
288  // this or dynamically dispatch it.
289  auto& embedding_values =
290  dynamic_cast<AbsDistMatrixType&>(embeddings.get_values());
291  std::unique_ptr<AbsDistMatrixType> pad_embedding(
292  embedding_values.Construct(embedding_values.Grid(),
293  embedding_values.Root()));
294  El::View(*pad_embedding, embedding_values, El::ALL, El::IR(m_padding_idx));
295  El::Zero(*pad_embedding);
296  }
297 
298  // Initialize gradient w.r.t. embeddings
299  {
300  auto& embedding_values =
301  dynamic_cast<AbsDistMatrixType&>(embeddings.get_values());
302  this->m_embeddings_grad.reset(
303  embedding_values.Construct(embedding_values.Grid(),
304  embedding_values.Root()));
306  }
307 }
308 
309 LBANN_DEFINE_LAYER_BUILDER(embedding);
310 
311 #ifndef LBANN_EMBEDDING_LAYER_INSTANTIATE
312 
313 #define PROTO_DEVICE(T, Device) \
314  extern template class embedding_layer<T, data_layout::DATA_PARALLEL, Device>
315 
317 #undef PROTO_DEVICE
318 
319 #endif // LBANN_EMBEDDING_LAYER_INSTANTIATE
320 
321 } // namespace lbann
322 
323 #endif // LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED
void write_specific_proto(lbann_data::Layer &proto) const final
Definition: embedding.hpp:143
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
embedding_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: embedding.hpp:196
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: embedding.hpp:94
lbann_comm * get_comm() const
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
void serialize(ArchiveT &ar)
#define LBANN_ERROR(...)
Definition: exception.hpp:37
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
Definition: embedding.hpp:242
Generates nicely formatted description messages.
Definition: description.hpp:49
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Definition: embedding.hpp:233
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: embedding.hpp:216
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
Definition: embedding.hpp:63
void add_weights(OwningWeightsPtr &&w)
Add weights to model.
virtual description get_description() const
Human-readable description.
constexpr El::Device Device
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
std::unique_ptr< AbsDistMatrixType > m_embeddings_grad
Definition: embedding.hpp:135
weights const & get_weights(size_t idx) const
void set_name(std::string name)
Metadata Accessors.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
size_t num_weights() const noexcept
Definition: layer.hpp:727
bool has_weights() const noexcept
Definition: layer.hpp:728
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: embedding.hpp:95
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: embedding.hpp:209
description get_description() const override
Human-readable description.
Definition: embedding.hpp:223
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
friend class cereal::access
Definition: embedding.hpp:114
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
std::string get_type() const override
Get the layer type&#39;s name.
Definition: embedding.hpp:202
void setup_data(size_t max_mini_batch_size) override
LBANN_DEFINE_LAYER_BUILDER(elu)
embedding_layer & operator=(const embedding_layer &other)
Definition: embedding.hpp:182
Lookup table to vectors of fixed size.
Definition: embedding.hpp:53
void add_weights(ViewingWeightsPtr w)
Definition: layer.hpp:723
data_type_layer & operator=(data_type_layer &&other)=default
model * m_model
Reference to model managing this layer.
Definition: layer.hpp:845