Channel-wise batch normalization, including scale/bias. More...

#include <batch_normalization.hpp>

Inheritance diagram for lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >:

Collaboration diagram for lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >:

Public Types
Public Types
using	AbsDistMatrixType = El::AbstractDistMatrix< TensorDataType >
	The tensor type expected in this object. More...

using	WeightsType = data_type_weights< TensorDataType >
	The concrete weights type used by this object. More...

using	OptimizerType = data_type_optimizer< TensorDataType >
	The concrete optimizer type used by this object. More...

Public Types inherited from lbann::data_type_layer< TensorDataType >
using	InputAbsDistMatrixType = El::AbstractDistMatrix< TensorDataType >
	The tensor type expected in this object. More...

using	OutputAbsDistMatrixType = El::AbstractDistMatrix< TensorDataType >

using	InputAbsDistMatReadProxyType = El::AbstractDistMatrixReadDeviceProxy< TensorDataType, D >
	The proxy tensor type expected in this object. More...

using	OutputAbsDistMatReadProxyType = El::AbstractDistMatrixReadDeviceProxy< TensorDataType, D >

using	InputAbsMatrixType = El::AbstractMatrix< TensorDataType >
	The local tensor type expected in this object. More...

using	OutputAbsMatrixType = El::AbstractMatrix< TensorDataType >

using	WeightsProxyType = weights_proxy< TensorDataType >
	The proxy type for weights used by this object. More...

Public Member Functions
	batch_normalization_layer (TensorDataType decay=0.9, TensorDataType epsilon=1e-5, int statistics_group_size=1, bool bessel_correction=true)
	Set up batch normalization. More...

	batch_normalization_layer (const batch_normalization_layer &other)

batch_normalization_layer &	operator= (const batch_normalization_layer &other)

batch_normalization_layer *	copy () const override
	Copy function. This function dynamically allocates memory for a layer instance and instantiates a copy. The caller is responsible for deallocating the instance. More...

std::string	get_type () const override
	Get the layer type's name. More...

data_layout	get_data_layout () const override
	Get data layout of the data tensors. We assume that the data layouts of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its data layout should override this function to return its template parameter. More...

El::Device	get_device_allocation () const override
	Get the device allocation for the data tensors. We assume that the decice allocation of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its device allocation should override this function to return its template parameter. More...

bool	can_run_inplace () const override
	If True, the computation can run in-place (feeding each input activations tensor as the corresponding output activations) More...

int	get_backprop_requirements () const override
	Returns the necessary tensors for computing backpropagation. More...

description	get_description () const override
	Human-readable description. More...

Serialization
template<typename ArchiveT >
void	serialize (ArchiveT &ar)

Public Member Functions inherited from lbann::data_type_layer< TensorDataType >
	data_type_layer (lbann_comm *, bool persistent_error_signals=false)

virtual	~data_type_layer ()=default

std::string	get_datatype_name () const override

void	forward_prop () final

void	summarize_matrices (lbann_summary &summarizer, int step) override

void	check_setup () override

const OutputAbsDistMatrixType &	get_activations (const Layer &child) const override

OutputAbsDistMatrixType &	get_activations (int child_index=0)

const OutputAbsDistMatrixType &	get_activations (int child_index=0) const

const InputAbsDistMatrixType &	get_error_signals (const Layer &parent) const override

InputAbsDistMatrixType &	get_error_signals (int parent_index=0)

const InputAbsDistMatrixType &	get_error_signals (int parent_index=0) const

El::Int	current_output_mini_batch_size () const override

El::Int	infer_mini_batch_size_from_parents_or_default_to_current () const override

OutputAbsDistMatrixType &	get_temp_grad ()

InputAbsDistMatrixType &	get_branch_tag_input (int tag)

std::vector< std::unique_ptr< InputAbsDistMatrixType > > &	get_branch_tag_input_vector ()

std::vector< std::unique_ptr< OutputAbsDistMatrixType > > &	get_all_activations ()

std::vector< std::unique_ptr< InputAbsDistMatrixType > > &	get_all_prev_activations ()

std::vector< std::unique_ptr< OutputAbsDistMatrixType > > &	get_all_prev_error_signals ()

std::vector< std::unique_ptr< InputAbsDistMatrixType > > &	get_all_error_signals ()

OutputAbsMatrixType &	get_local_activations (int child_index=0)

const OutputAbsMatrixType &	get_local_activations (int child_index=0) const

InputAbsMatrixType &	get_local_error_signals (int parent_index=0)

const InputAbsMatrixType &	get_local_error_signals (int parent_index=0) const

void	set_keep_error_signals (bool) override
	Set whether to keep or dynamically reallocate error signals. More...

El::mpi::Comm &	get_subgrid_comm ()

void	serialize (ArchiveT &ar)

Public Member Functions inherited from lbann::Layer
void	write_proto (lbann_data::Layer &proto) const
	Write layer to proto file. More...

lbann_comm *	get_comm () const

int	get_grid_tag () const noexcept
	Identifying tag for process grid. More...

void	set_grid_tag (int tag)
	Set process grid. More...

bool	runs_inplace () const
	If true, the layer will run in-place (the input and output activations point to the same tensor). Value is set during graph setup (in setup_pointers) based on layer traits and neighboring layers. More...

bool	distconv_enabled () const
	Indicate whether distconv is enabled. More...

	Layer ()

virtual	~Layer ()=default

void	set_name (const std::string name)
	Set the layer instance's name. Each layer in a model should have a unique, preferably human-readable, name. More...

void	set_model (model *m)
	Set the model that manages this layer. More...

std::string	get_name () const
	Get the layer instance's name. More...

model *	get_model () const noexcept
	Get a reference to the model that manages this layer. More...

int	get_expected_num_parent_layers () const noexcept
	Get expected number of parent layers. A negative value indicates no limit. More...

int	get_expected_num_child_layers () const noexcept
	Get expected number of child layers. A negative value indicates no limit. More...

ParallelStrategy &	get_parallel_strategy () noexcept
	Get the parallel strategy for the layer. More...

ParallelStrategy const &	get_parallel_strategy () const noexcept
	Get the parallel strategy for the layer. More...

bool	using_gpus () const noexcept
	Whether the layer is using a GPU implementation. More...

void	back_prop ()
	Backward propagation step. Given the objective function gradients w.r.t. the output tensors, compute the gradients w.r.t. the input tensors and w.r.t. the weights. This is essentially an application of the chain rule. More...

bool	update ()
	Update step. Update the layer's internal members. Note that the optimization step for the weights happens elsewhere. More...

virtual void	setup (size_t max_mini_batch_size, const std::vector< El::Grid *> &grids)
	Setup layer members. More...

void	summarize_stats (lbann_summary &summarizer, int step)

void	reset_counters ()
	Reset layer stat counters. More...

void	set_communication_flag (SubGraphCommunication type)

SubGraphCommunication	get_communication_flag ()

void	set_num_spliting_groups (El::Int spliting_groups)

El::Int	get_num_spliting_groups () const

std::shared_ptr< El::Grid >	get_mygrid () const

void	reset_inter_subgrid_vc_comm (std::shared_ptr< El::mpi::Comm > mpi_comm)

void	set_subgraph_parallelism_execution ()

bool	subgraph_parallelism_execution () const noexcept

void	set_run_layer_in_subgraph ()

bool	get_run_layer_in_subgraph () const noexcept

const Layer &	get_parent_layer (size_t index=0) const

const Layer &	get_child_layer (size_t index=0) const

std::vector< const Layer * >	get_parent_layers () const

std::vector< const Layer * >	get_child_layers () const

size_t	find_parent_layer_index (const Layer &l) const

size_t	find_child_layer_index (const Layer &l) const

int	get_num_parents () const noexcept
	Get number of parent layers. More...

int	get_num_children () const noexcept
	Get number of child layers. More...

void	add_parent_layer (ViewingLayerPtr parent)
	Add a parent layer. More...

void	add_child_layer (ViewingLayerPtr child)
	Add a child layer. More...

void	replace_parent_layer (ViewingLayerPtr l, size_t index)

void	replace_child_layer (ViewingLayerPtr l, size_t index)

void	clear_parent_layers ()
	Remove pointers to parent layers. More...

void	clear_child_layers ()
	Remove pointers to child layers. More...

ViewingLayerPtr	get_parent_layer_pointer (size_t index) const

ViewingLayerPtr	get_child_layer_pointer (size_t index) const

virtual std::vector< ViewingLayerPtr >	get_layer_pointers ()
	List of pointers to other layers. More...

virtual void	set_layer_pointers (std::vector< ViewingLayerPtr > layers)
	Set list of pointers to other layers. More...

std::vector< ViewingWeightsPtr >	get_weights_pointers () const
	List of pointers to weights. More...

void	set_weights_pointers (std::vector< ViewingWeightsPtr > ptrs)
	Set list of pointers to weights. More...

void	replace_weights (Layer const &other_layer)
	Replace weights with another Layer's weights. More...

std::vector< int >	get_input_dims (size_t input_index=0) const
	Get input tensor dimensions. More...

int	get_input_size (size_t input_index=0) const
	Get input tensor size. More...

std::vector< int >	get_output_dims (size_t output_index=0) const
	Get output tensor dimensions. More...

int	get_output_size (size_t output_index=0) const
	Get output tensor size. More...

void	set_output_dims (std::vector< int > dims, size_t output_index=0)
	Set output tensor dimensions. More...

El::Int	infer_mini_batch_size_from_parents () const

void	set_hint_layer (ViewingLayerPtr l)
	Set hint layer. More...

const Layer *	get_hint_layer () const
	Get hint layer. More...

void	freeze ()

void	unfreeze ()

bool	is_frozen () const

template<typename ArchiveT >
void	serialize (ArchiveT &ar)

Protected Member Functions
void	write_specific_proto (lbann_data::Layer &proto) const final

void	setup_dims () override
	Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions. More...

void	setup_data (size_t max_mini_batch_size) override
	Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices. More...

void	fp_compute () override
	Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values. More...

void	bp_compute () override
	Compute objective funciton gradients. Called by the 'back_prop' function. Given the input, output, and gradient w.r.t. output tensors, the gradient w.r.t. input tensors are populated with the computed values and the gradients w.r.t. the weights are sent to the appropriate optimizers. More...

Protected Member Functions inherited from lbann::data_type_layer< TensorDataType >
InputAbsDistMatrixType &	get_prev_activations (int parent_index=0)

const InputAbsDistMatrixType &	get_prev_activations (int parent_index=0) const

OutputAbsDistMatrixType &	get_prev_error_signals (int child_index=0)

const OutputAbsDistMatrixType &	get_prev_error_signals (int child_index=0) const

const InputAbsMatrixType &	get_local_prev_activations (int parent_index=0) const

const OutputAbsMatrixType &	get_local_prev_error_signals (int child_index=0) const

void	setup_matrices (const std::vector< El::Grid * > &grids) override

void	setup_data (size_t max_mini_batch_size) override

void	fp_setup_inputs () override

void	fp_setup_outputs () override

void	bp_setup_gradient_wrt_inputs () override

void	bp_compute () override

InputAbsDistMatrixType const &	weights_values (size_t idx) const
	Get the values matrix for a specific weights object. More...

weights &	master_weights (size_t idx)
	Get a specific master weights object. More...

weights const &	master_weights (size_t idx) const

	data_type_layer (data_type_layer &&other)=default
	Protected lifecycle functions. More...

	data_type_layer (data_type_layer const &other)

data_type_layer &	operator= (data_type_layer &&other)=default

data_type_layer &	operator= (data_type_layer const &other)

Protected Member Functions inherited from lbann::Layer
void	setup_grid ()
	Setup process grid. More...

virtual void	setup_pointers ()
	Setup layer pointers. Called by the 'setup' function. Pointers to parent/child layers are assumed to be already initialized. More...

virtual void	setup_gpu ()
	Setup GPU objects. Called by the 'setup' function if the layer is on GPUs. More...

virtual bool	update_compute ()
	Perform the computation for the update step. Returns false if the layer must reset for a new training epoch. More...

	Layer (Layer &&other)=default

	Layer (Layer const &other)

Layer &	operator= (Layer &&other)=default

Layer &	operator= (Layer const &other)

void	add_weights (ViewingWeightsPtr w)

size_t	num_weights () const noexcept

bool	has_weights () const noexcept

bool	has_weights (size_t idx) const noexcept

void	set_num_weights (size_t n)

void	set_weights (size_t idx, ViewingWeightsPtr w)

weights const &	get_weights (size_t idx) const

weights &	get_weights (size_t idx)

void	add_as_gradient_source ()

void	remove_as_gradient_source ()

Private Attributes
TensorDataType	m_decay
	Decay rate for running statistics. More...

TensorDataType	m_epsilon
	Small number for numerical stability. More...

int	m_statistics_group_size
	Size of process group for computing statistics. More...

bool	m_bessel_correction
	Add Bessel's correction to the batch normalization denominator. More...

std::unordered_map< El::Int, El::Int >	m_num_per_sum_cache

std::unique_ptr< AbsDistMatrixType >	m_mean_and_var
	Current minibatch means and standard deviations. More...

std::unique_ptr< AbsDistMatrixType >	m_mean_v

std::unique_ptr< AbsDistMatrixType >	m_var_v

std::unique_ptr< AbsDistMatrixType >	m_mean_and_var_gradient
	Gradients w.r.t. means and standard deviations. More...

std::unique_ptr< AbsDistMatrixType >	m_mean_gradient_v

std::unique_ptr< AbsDistMatrixType >	m_var_gradient_v

std::unique_ptr< AbsDistMatrixType >	m_scale_gradient

std::unique_ptr< AbsDistMatrixType >	m_bias_gradient

Additional Inherited Members
Protected Attributes inherited from lbann::Layer
int	m_expected_num_parent_layers = 1

int	m_expected_num_child_layers = 1
	Expected number of child layers. A negative value indicates no limit. More...

model *	m_model = nullptr
	Reference to model managing this layer. More...

bool	m_frozen
	Avoid back prop if frozen. More...

EvalType	m_fp_time
	Time spent in forward propagation. More...

EvalType	m_fp_compute_time
	Time spent in the forward propagation computation. More...

EvalType	m_bp_time
	Time spent in backward propagation. More...

EvalType	m_bp_compute_time
	Time spent in the backward propagation computation. More...

EvalType	m_update_time
	Time spent in updates. More...

std::string	m_name
	Layer instance's name. Each layer in a model should have a unique, preferably human-readable, name. More...

bool	m_runs_inplace = false
	If true, the layer will run in-place (the input and output activations point to the same tensor). Value is set during graph setup (in setup_pointers) based on layer traits and neighboring layers. More...

int	m_grid_tag = -1
	Identifying tag for process grid. More...

SubGraphCommunication	subgraph_communication_method = PT2PT

bool	m_subgraph_parallelism_execution = false

bool	run_layer_in_subgraph = false

std::unique_ptr< std::set< int > >	m_subgrid_ranks

El::Int	m_num_spliting_groups = 1

std::shared_ptr< El::mpi::Comm >	m_interSubGridVCComm

Detailed Description

template<typename TensorDataType, data_layout T_layout, El::Device Dev>
class lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >

Channel-wise batch normalization, including scale/bias.

Each input channel is normalized across the mini-batch to have zero mean and unit standard deviation. Learned scaling factors and biases are then applied. This uses the standard approach of maintaining the running mean and standard deviation (with exponential decay) for use at test time. See:

Sergey Ioffe and Christian Szegedy. "Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift." In International Conference on Machine Learning, pp. 448-456. 2015.

Definition at line 110 of file batch_normalization.hpp.

Member Typedef Documentation

◆ AbsDistMatrixType

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

using lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>

The tensor type expected in this object.

Definition at line 120 of file batch_normalization.hpp.

◆ OptimizerType

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

using lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::OptimizerType = data_type_optimizer<TensorDataType>

The concrete optimizer type used by this object.

Definition at line 126 of file batch_normalization.hpp.

◆ WeightsType

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

using lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::WeightsType = data_type_weights<TensorDataType>

The concrete weights type used by this object.

Definition at line 123 of file batch_normalization.hpp.

Constructor & Destructor Documentation

◆ batch_normalization_layer() [1/2]

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::batch_normalization_layer	(	TensorDataType	decay = `0.9`,
		TensorDataType	epsilon = `1e-5`,
		int	statistics_group_size = `1`,
		bool	bessel_correction = `true`
	)

inline

Set up batch normalization.

Parameters

decay	Controls the momentum of the running mean/standard deviation averages.
epsilon	A small number to avoid division by zero.
statistics_group_size	Number of processors to aggregate statistics over. Defaults to 1 (i.e. local aggregation).
bessel_correction	Add Bessel's correction to the denominator.

Definition at line 187 of file batch_normalization.hpp.

◆ batch_normalization_layer() [2/2]

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::batch_normalization_layer ( const batch_normalization_layer< TensorDataType, T_layout, Dev > & other )

inline

Definition at line 203 of file batch_normalization.hpp.

Member Function Documentation

◆ bp_compute()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

void lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::bp_compute ( )

overrideprotectedvirtual

Compute objective funciton gradients. Called by the 'back_prop' function. Given the input, output, and gradient w.r.t. output tensors, the gradient w.r.t. input tensors are populated with the computed values and the gradients w.r.t. the weights are sent to the appropriate optimizers.

Reimplemented from lbann::Layer.

◆ can_run_inplace()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

bool lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::can_run_inplace ( ) const

inlineoverridevirtual

If True, the computation can run in-place (feeding each input activations tensor as the corresponding output activations)

Reimplemented from lbann::Layer.

Definition at line 263 of file batch_normalization.hpp.

◆ copy()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

batch_normalization_layer* lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::copy ( ) const

inlineoverridevirtual

Copy function. This function dynamically allocates memory for a layer instance and instantiates a copy. The caller is responsible for deallocating the instance.

Implements lbann::Layer.

Definition at line 256 of file batch_normalization.hpp.

◆ fp_compute()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

void lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::fp_compute ( )

overrideprotectedvirtual

Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.

Implements lbann::Layer.

◆ get_backprop_requirements()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

int lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::get_backprop_requirements ( ) const

inlineoverridevirtual

Returns the necessary tensors for computing backpropagation.

Reimplemented from lbann::Layer.

Definition at line 264 of file batch_normalization.hpp.

◆ get_data_layout()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

data_layout lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::get_data_layout ( ) const

inlineoverridevirtual

Get data layout of the data tensors. We assume that the data layouts of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its data layout should override this function to return its template parameter.

Implements lbann::Layer.

Definition at line 261 of file batch_normalization.hpp.

◆ get_description()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

description lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::get_description ( ) const

inlineoverridevirtual

Human-readable description.

Reimplemented from lbann::Layer.

Definition at line 269 of file batch_normalization.hpp.

Here is the call graph for this function:

◆ get_device_allocation()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

El::Device lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::get_device_allocation ( ) const

inlineoverridevirtual

Get the device allocation for the data tensors. We assume that the decice allocation of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its device allocation should override this function to return its template parameter.

Implements lbann::Layer.

Definition at line 262 of file batch_normalization.hpp.

◆ get_type()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::string lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::get_type ( ) const

inlineoverridevirtual

Get the layer type's name.

A layer type name should be brief, unique, and human-readable description of the layer's mathematical operation that is recognizable to ML practitioners (e.g., "Convolution", "ReLU")

Implements lbann::Layer.

Definition at line 260 of file batch_normalization.hpp.

◆ operator=()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

batch_normalization_layer& lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::operator= ( const batch_normalization_layer< TensorDataType, T_layout, Dev > & other )

inline

Definition at line 227 of file batch_normalization.hpp.

Here is the call graph for this function:

◆ serialize()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

template<typename ArchiveT >

void lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::serialize ( ArchiveT & ar )

◆ setup_data()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

void lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::setup_data ( size_t max_mini_batch_size )

inlineoverrideprotectedvirtual

Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices.

Reimplemented from lbann::Layer.

Definition at line 297 of file batch_normalization.hpp.

Here is the call graph for this function:

◆ setup_dims()

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

void lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::setup_dims ( )

inlineoverrideprotectedvirtual

Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.

Reimplemented from lbann::Layer.

Definition at line 291 of file batch_normalization.hpp.

Here is the call graph for this function:

◆ write_specific_proto()

template<typename T , data_layout L, El::Device D>

void lbann::batch_normalization_layer< T, L, D >::write_specific_proto ( lbann_data::Layer & proto ) const

finalprotectedvirtual

Add layer specific data to prototext

Implements lbann::Layer.

Definition at line 39 of file batch_normalization_impl.hpp.

Here is the call graph for this function:

Member Data Documentation

◆ m_bessel_correction

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

bool lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_bessel_correction

private

Add Bessel's correction to the batch normalization denominator.

Bessel's correction makes the layer more statistically robust; disabling it, however, makes the layer compatible with PyTorch's implementation.

Definition at line 147 of file batch_normalization.hpp.

◆ m_bias_gradient

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_bias_gradient

private

Gradient w.r.t. bias terms.

Definition at line 175 of file batch_normalization.hpp.

◆ m_decay

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

TensorDataType lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_decay

private

Decay rate for running statistics.

Definition at line 132 of file batch_normalization.hpp.

◆ m_epsilon

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

TensorDataType lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_epsilon

private

Small number for numerical stability.

Definition at line 134 of file batch_normalization.hpp.

◆ m_mean_and_var

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_mean_and_var

private

Current minibatch means and standard deviations.

These are fused for performance when doing non-local batchnorm.

Definition at line 158 of file batch_normalization.hpp.

◆ m_mean_and_var_gradient

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_mean_and_var_gradient

private

Gradients w.r.t. means and standard deviations.

These are fused for performance when doing non-local batchnorm.

Definition at line 167 of file batch_normalization.hpp.

◆ m_mean_gradient_v

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_mean_gradient_v

private

View of gradient w.r.t. means.

Definition at line 169 of file batch_normalization.hpp.

◆ m_mean_v

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_mean_v

private

View of current mini-batch means.

Definition at line 160 of file batch_normalization.hpp.

◆ m_num_per_sum_cache

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unordered_map<El::Int, El::Int> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_num_per_sum_cache

private

Cache of node-local num_per_sum results for node-local stats. Indexed by effective mini-batch size.

Definition at line 152 of file batch_normalization.hpp.

◆ m_scale_gradient

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_scale_gradient

private

Gradient w.r.t. scaling terms.

Definition at line 173 of file batch_normalization.hpp.

◆ m_statistics_group_size

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

int lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_statistics_group_size

private

Size of process group for computing statistics.

If this is 1, the group consists of one process and aggregation is local. If it is 0, statistics are aggregated globally.

Definition at line 140 of file batch_normalization.hpp.

◆ m_var_gradient_v

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_var_gradient_v

private

View of gradient w.r.t. standard deviations.

Definition at line 171 of file batch_normalization.hpp.

◆ m_var_v

template<typename TensorDataType, data_layout T_layout, El::Device Dev>

std::unique_ptr<AbsDistMatrixType> lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >::m_var_v

private

View of current mini-batch standard deviations.

Definition at line 162 of file batch_normalization.hpp.

The documentation for this class was generated from the following files:

Public Types

Public Member Functions

Protected Member Functions

Private Attributes

Additional Inherited Members

Detailed Description

template<typename TensorDataType, data_layout T_layout, El::Device Dev> class lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >

Member Typedef Documentation

◆ AbsDistMatrixType

◆ OptimizerType

◆ WeightsType

Constructor & Destructor Documentation

◆ batch_normalization_layer() [1/2]

◆ batch_normalization_layer() [2/2]

Member Function Documentation

◆ bp_compute()

◆ can_run_inplace()

◆ copy()

◆ fp_compute()

◆ get_backprop_requirements()

◆ get_data_layout()

◆ get_description()

◆ get_device_allocation()

◆ get_type()

◆ operator=()

◆ serialize()

◆ setup_data()

◆ setup_dims()

◆ write_specific_proto()

Member Data Documentation

◆ m_bessel_correction

◆ m_bias_gradient

◆ m_decay

◆ m_epsilon

◆ m_mean_and_var

◆ m_mean_and_var_gradient

◆ m_mean_gradient_v

◆ m_mean_v

◆ m_num_per_sum_cache

◆ m_scale_gradient

◆ m_statistics_group_size

◆ m_var_gradient_v

◆ m_var_v

template<typename TensorDataType, data_layout T_layout, El::Device Dev>
class lbann::batch_normalization_layer< TensorDataType, T_layout, Dev >