27 #ifndef LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED 28 #define LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED 33 #include "lbann/proto/layers.pb.h" 52 template <
typename TensorDataType, data_layout Layout, El::Device Device>
56 "embedding layer only supports data parallel layout");
84 El::Int padding_idx = -1);
91 std::string
get_type()
const override;
105 template <
typename ArchiveT>
118 void setup_data(
size_t max_mini_batch_size)
override;
142 template <
typename T, data_layout L, El::Device D>
144 lbann_data::Layer& proto)
const 146 proto.set_datatype(proto::ProtoDataType<T>);
147 auto* msg = proto.mutable_embedding();
153 template <
typename TensorDataType, data_layout Layout, El::Device Device>
155 size_t num_embeddings,
156 size_t embedding_dim,
164 template <
typename TensorDataType, data_layout Layout, El::Device Device>
169 template <
typename TensorDataType, data_layout Layout, El::Device Device>
180 template <
typename TensorDataType, data_layout Layout, El::Device Device>
194 template <
typename TensorDataType, data_layout Layout, El::Device Device>
201 template <
typename TensorDataType, data_layout Layout, El::Device Device>
207 template <
typename TensorDataType, data_layout Layout, El::Device Device>
214 template <
typename TensorDataType, data_layout Layout, El::Device Device>
221 template <
typename TensorDataType, data_layout Layout, El::Device Device>
232 template <
typename TensorDataType, data_layout Layout, El::Device Device>
241 template <
typename TensorDataType, data_layout Layout, El::Device Device>
243 size_t max_mini_batch_size)
251 auto w = std::make_shared<WeightsType>(*this->
get_comm());
252 auto init = std::make_unique<normal_initializer<TensorDataType>>(
253 El::TypeTraits<TensorDataType>::Zero(),
254 El::TypeTraits<TensorDataType>::One());
255 auto opt = this->
m_model->template create_optimizer<TensorDataType>();
257 w->set_initializer(std::move(init));
258 w->set_optimizer(std::move(opt));
268 "with an invalid number of weights ",
269 "(expected 1, found ",
277 matrix_dist.colDist = El::STAR;
278 matrix_dist.rowDist = El::STAR;
280 embeddings.set_matrix_distribution(matrix_dist);
289 auto& embedding_values =
291 std::unique_ptr<AbsDistMatrixType> pad_embedding(
292 embedding_values.Construct(embedding_values.Grid(),
293 embedding_values.Root()));
294 El::View(*pad_embedding, embedding_values, El::ALL, El::IR(
m_padding_idx));
295 El::Zero(*pad_embedding);
300 auto& embedding_values =
303 embedding_values.Construct(embedding_values.Grid(),
304 embedding_values.Root()));
311 #ifndef LBANN_EMBEDDING_LAYER_INSTANTIATE 313 #define PROTO_DEVICE(T, Device) \ 314 extern template class embedding_layer<T, data_layout::DATA_PARALLEL, Device> 319 #endif // LBANN_EMBEDDING_LAYER_INSTANTIATE 323 #endif // LBANN_LAYERS_LEARNING_EMBEDDING_HPP_INCLUDED
void write_specific_proto(lbann_data::Layer &proto) const final
void fp_compute() override
Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.
virtual void setup_dims()
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
embedding_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
lbann_comm * get_comm() const
void bp_compute() override
Compute objective funciton gradients. Called by the 'back_prop' function. Given the input...
void serialize(ArchiveT &ar)
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices...
Generates nicely formatted description messages.
void setup_dims() override
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
void add_weights(OwningWeightsPtr &&w)
Add weights to model.
virtual description get_description() const
Human-readable description.
constexpr El::Device Device
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
std::unique_ptr< AbsDistMatrixType > m_embeddings_grad
weights const & get_weights(size_t idx) const
void set_name(std::string name)
Metadata Accessors.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
size_t num_weights() const noexcept
bool has_weights() const noexcept
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
description get_description() const override
Human-readable description.
std::string get_name() const
Get the layer instance's name.
friend class cereal::access
data_layout
Data layout that is optimized for different modes of parallelism.
~embedding_layer()=default
std::string get_type() const override
Get the layer type's name.
void setup_data(size_t max_mini_batch_size) override
LBANN_DEFINE_LAYER_BUILDER(elu)
embedding_layer & operator=(const embedding_layer &other)
Lookup table to vectors of fixed size.
void add_weights(ViewingWeightsPtr w)
data_type_layer & operator=(data_type_layer &&other)=default
model * m_model
Reference to model managing this layer.