27 #ifndef LBANN_LAYERS_REGULARIZERS_LAYER_NORM_HPP_INCLUDED 28 #define LBANN_LAYERS_REGULARIZERS_LAYER_NORM_HPP_INCLUDED 35 #include "lbann/proto/layers.pb.h" 53 template <
typename TensorDataType, data_layout Layout, El::Device Device>
77 std::string
get_type()
const override;
90 template <
typename ArchiveT>
100 void setup_data(
size_t max_mini_batch_size)
override;
139 template <
typename T, data_layout L, El::Device D>
141 lbann_data::Layer& proto)
const 143 proto.set_datatype(proto::ProtoDataType<T>);
144 auto* msg = proto.mutable_layer_norm();
145 msg->mutable_epsilon()->set_value(
m_epsilon);
150 template <
typename TensorDataType, data_layout Layout, El::Device Device>
152 TensorDataType epsilon,
161 template <
typename TensorDataType, data_layout Layout, El::Device Device>
178 template <
typename TensorDataType, data_layout Layout, El::Device Device>
198 template <
typename TensorDataType, data_layout Layout, El::Device Device>
205 template <
typename TensorDataType, data_layout Layout, El::Device Device>
211 template <
typename TensorDataType, data_layout Layout, El::Device Device>
218 template <
typename TensorDataType, data_layout Layout, El::Device Device>
225 template <
typename TensorDataType, data_layout Layout, El::Device Device>
231 desc.add(
"Affine Scale",
m_scale);
232 desc.add(
"Affine Bias",
m_bias);
236 template <
typename TensorDataType, data_layout Layout, El::Device Device>
243 template <
typename TensorDataType, data_layout Layout, El::Device Device>
245 size_t max_mini_batch_size)
249 std::vector<size_t> out_dims{output_dims.begin(), output_dims.end()};
251 dist.colDist = El::STAR;
252 m_statistics.reset(AbsDistMatrixType::Instantiate(dist));
266 "with an invalid number of weights ",
281 dist.rowDist = El::STAR;
285 auto w = std::make_shared<WeightsType>(*this->
get_comm());
286 auto init = std::make_unique<constant_initializer<TensorDataType>>(
287 El::TypeTraits<TensorDataType>::One());
288 auto opt = this->
m_model->template create_optimizer<TensorDataType>();
290 w->set_optimizer(std::move(opt));
291 w->set_initializer(std::move(init));
306 auto w = std::make_shared<WeightsType>(*this->
get_comm());
307 auto init = std::make_unique<constant_initializer<TensorDataType>>(
308 El::TypeTraits<TensorDataType>::Zero());
309 auto opt = this->
m_model->template create_optimizer<TensorDataType>();
311 w->set_optimizer(std::move(opt));
312 w->set_initializer(std::move(init));
332 #ifndef LBANN_LAYER_NORM_LAYER_INSTANTIATE 333 #define PROTO_DEVICE(T, Device) \ 334 extern template class layer_norm_layer<T, \ 335 data_layout::DATA_PARALLEL, \ 337 extern template class layer_norm_layer<T, data_layout::MODEL_PARALLEL, Device> 341 #endif // LBANN_LAYER_NORM_LAYER_INSTANTIATE 345 #endif // LBANN_LAYERS_REGULARIZERS_LAYER_NORM_HPP_INCLUDED virtual void setup_dims()
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
lbann_comm * get_comm() const
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
layer_norm_layer & operator=(const layer_norm_layer &other)
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
std::unique_ptr< AbsDistMatType > m_bias_gradient
Gradient w.r.t. bias.
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
size_t get_matrix_width() const
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Generates nicely formatted description messages.
std::unique_ptr< AbsDistMatType > m_scale_gradient
Gradient w.r.t. scale.
void add_weights(OwningWeightsPtr &&w)
Add weights to model.
virtual description get_description() const
Human-readable description.
constexpr El::Device Device
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
layer_norm_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
void serialize(ArchiveT &ar)
weights const & get_weights(size_t idx) const
void set_name(std::string name)
Metadata Accessors.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
size_t num_weights() const noexcept
bool m_bias
Apply elementwise bias after normalization (learned weights).
bool has_weights() const noexcept
bool m_scale
Apply elementwise scale after normalization (learned weights).
void set_matrix_distribution(El::DistData dist)
void bp_compute() override
Compute objective funciton gradients. Called by the 'back_prop' function. Given the input...
std::string get_name() const
Get the layer instance's name.
void set_num_weights(size_t n)
El::AbstractDistMatrix< TensorDataType > AbsDistMatType
Normalize over data samples.
void write_specific_proto(lbann_data::Layer &proto) const final
std::unique_ptr< AbsDistMatType > m_statistics_gradient
Gradients w.r.t. per-sample statistics.
description get_description() const override
Human-readable description.
layer_norm_layer(TensorDataType epsilon=El::To< TensorDataType >(1e-5), bool scale=false, bool bias=false)
data_layout
Data layout that is optimized for different modes of parallelism.
size_t get_matrix_height() const
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices...
std::string get_type() const override
Get the layer type's name.
void set_dims(std::vector< size_t > matrix_height_dims, std::vector< size_t > matrix_width_dims={})
void set_weights(size_t idx, ViewingWeightsPtr w)
void fp_compute() override
Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.
void setup_data(size_t max_mini_batch_size) override
LBANN_DEFINE_LAYER_BUILDER(elu)
std::unique_ptr< AbsDistMatType > m_statistics
Per-sample statistics.
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.
data_type_layer & operator=(data_type_layer &&other)=default
model * m_model
Reference to model managing this layer.
void setup_dims() override
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.