27 #ifndef LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED 28 #define LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED 34 #include "lbann/proto/layers.pb.h" 51 template <
typename TensorDataType, data_layout Layout, El::Device Device>
69 TensorDataType decay = El::To<TensorDataType>(0.9),
70 TensorDataType epsilon = El::To<TensorDataType>(1e-5))
109 return "entry-wise batch normalization";
130 template <
typename ArchiveT>
146 std::vector<size_t> output_dims(output_dims_.begin(), output_dims_.end());
153 "with an invalid number of weights ",
160 auto w = std::make_shared<WeightsType>(*this->
get_comm());
161 auto init = std::make_unique<constant_initializer<TensorDataType>>(
162 El::TypeTraits<TensorDataType>::Zero());
163 w->set_name(this->
get_name() +
"_running_mean");
164 w->set_initializer(std::move(init));
169 auto w = std::make_shared<WeightsType>(*this->
get_comm());
170 auto init = std::make_unique<constant_initializer<TensorDataType>>(
171 El::TypeTraits<TensorDataType>::One());
172 w->set_name(this->
get_name() +
"_running_variance");
173 w->set_initializer(std::move(init));
180 dist.rowDist = El::STAR;
184 w.set_dims(output_dims);
185 w.set_matrix_distribution(dist);
214 template <
typename T, data_layout L, El::Device D>
216 lbann_data::Layer& proto)
const 218 proto.set_datatype(proto::ProtoDataType<T>);
219 auto* msg = proto.mutable_entrywise_batch_normalization();
226 #ifndef LBANN_ENTRYWISE_BATCH_NORMALIZATION_LAYER_INSTANTIATE 227 #define PROTO_DEVICE(T, Device) \ 228 extern template class entrywise_batch_normalization_layer< \ 230 data_layout::DATA_PARALLEL, \ 232 extern template class entrywise_batch_normalization_layer< \ 234 data_layout::MODEL_PARALLEL, \ 239 #endif // LBANN_ENTRYWISE_BATCH_NORMALIZATION_LAYER_INSTANTIATE 243 #endif // LBANN_LAYERS_REGULARIZERS_ENTRYWISE_BATCH_NORMALIZATION_HPP_INCLUDED
lbann_comm * get_comm() const
void bp_compute() override
Compute objective funciton gradients. Called by the 'back_prop' function. Given the input...
Entry-wise batch normalization, including scale/bias.
void fp_compute() override
Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.
entrywise_batch_normalization_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
description get_description() const override
Human-readable description.
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Generates nicely formatted description messages.
void write_specific_proto(lbann_data::Layer &proto) const final
entrywise_batch_normalization_layer(const entrywise_batch_normalization_layer &other)
void add_weights(OwningWeightsPtr &&w)
Add weights to model.
virtual description get_description() const
Human-readable description.
constexpr El::Device Device
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
weights const & get_weights(size_t idx) const
std::string get_type() const override
Get the layer type's name.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
size_t num_weights() const noexcept
bool has_weights() const noexcept
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
entrywise_batch_normalization_layer(TensorDataType decay=El::To< TensorDataType >(0.9), TensorDataType epsilon=El::To< TensorDataType >(1e-5))
std::unique_ptr< AbsDistMatrixType > m_batch_statistics_gradient
Gradients w.r.t. current mini-batch statistics.
std::string get_name() const
Get the layer instance's name.
void set_num_weights(size_t n)
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
void serialize(ArchiveT &ar)
data_layout
Data layout that is optimized for different modes of parallelism.
std::unique_ptr< AbsDistMatrixType > m_batch_statistics
Current mini-batch statistics.
void set_weights(size_t idx, ViewingWeightsPtr w)
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
void setup_data(size_t max_mini_batch_size) override
LBANN_DEFINE_LAYER_BUILDER(elu)
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices...
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.
entrywise_batch_normalization_layer & operator=(const entrywise_batch_normalization_layer &other)
data_type_layer & operator=(data_type_layer &&other)=default
model * m_model
Reference to model managing this layer.
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.