Neural network tensor operation. More...

#include <layer.hpp>

Inheritance diagram for lbann::Layer:

Collaboration diagram for lbann::Layer:

Public Member Functions
void	write_proto (lbann_data::Layer &proto) const
	Write layer to proto file. More...

lbann_comm *	get_comm () const

int	get_grid_tag () const noexcept
	Identifying tag for process grid. More...

void	set_grid_tag (int tag)
	Set process grid. More...

virtual void	set_keep_error_signals (bool)=0
	Set whether to keep or dynamically reallocate error signals. More...

bool	runs_inplace () const
	If true, the layer will run in-place (the input and output activations point to the same tensor). Value is set during graph setup (in setup_pointers) based on layer traits and neighboring layers. More...

bool	distconv_enabled () const
	Indicate whether distconv is enabled. More...

Lifecycle
	Layer ()

virtual	~Layer ()=default

virtual Layer *	copy () const =0
	Copy function. This function dynamically allocates memory for a layer instance and instantiates a copy. The caller is responsible for deallocating the instance. More...

Metadata modifiers
void	set_name (const std::string name)
	Set the layer instance's name. Each layer in a model should have a unique, preferably human-readable, name. More...

void	set_model (model *m)
	Set the model that manages this layer. More...

Metadata queries
std::string	get_name () const
	Get the layer instance's name. More...

model *	get_model () const noexcept
	Get a reference to the model that manages this layer. More...

virtual std::string	get_type () const =0
	Get the layer type's name. More...

virtual std::string	get_datatype_name () const =0
	Get a string representing the layer datatype. More...

virtual description	get_description () const
	Human-readable description. More...

virtual data_layout	get_data_layout () const =0
	Get data layout of the data tensors. We assume that the data layouts of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its data layout should override this function to return its template parameter. More...

virtual El::Device	get_device_allocation () const =0
	Get the device allocation for the data tensors. We assume that the decice allocation of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its device allocation should override this function to return its template parameter. More...

int	get_expected_num_parent_layers () const noexcept
	Get expected number of parent layers. A negative value indicates no limit. More...

int	get_expected_num_child_layers () const noexcept
	Get expected number of child layers. A negative value indicates no limit. More...

virtual int	get_backprop_requirements () const
	Returns the necessary tensors for computing backpropagation. More...

ParallelStrategy &	get_parallel_strategy () noexcept
	Get the parallel strategy for the layer. More...

ParallelStrategy const &	get_parallel_strategy () const noexcept
	Get the parallel strategy for the layer. More...

Metadata predicates
virtual bool	can_run_inplace () const
	If True, the computation can run in-place (feeding each input activations tensor as the corresponding output activations) More...

bool	using_gpus () const noexcept
	Whether the layer is using a GPU implementation. More...

Training support
virtual void	forward_prop ()=0
	Forward propagation step. Apply a mathematical operation to input tensors to obtain output tensors. More...

void	back_prop ()
	Backward propagation step. Given the objective function gradients w.r.t. the output tensors, compute the gradients w.r.t. the input tensors and w.r.t. the weights. This is essentially an application of the chain rule. More...

bool	update ()
	Update step. Update the layer's internal members. Note that the optimization step for the weights happens elsewhere. More...

virtual void	setup (size_t max_mini_batch_size, const std::vector< El::Grid *> &grids)
	Setup layer members. More...

virtual void	check_setup ()
	Check that the setup is reasonable. More...

Summarizer support
void	summarize_stats (lbann_summary &summarizer, int step)

virtual void	summarize_matrices (lbann_summary &summarizer, int step)=0

void	reset_counters ()
	Reset layer stat counters. More...

Subgraph stuff
void	set_communication_flag (SubGraphCommunication type)

SubGraphCommunication	get_communication_flag ()

void	set_num_spliting_groups (El::Int spliting_groups)

El::Int	get_num_spliting_groups () const

std::shared_ptr< El::Grid >	get_mygrid () const

void	reset_inter_subgrid_vc_comm (std::shared_ptr< El::mpi::Comm > mpi_comm)

void	set_subgraph_parallelism_execution ()

bool	subgraph_parallelism_execution () const noexcept

void	set_run_layer_in_subgraph ()

bool	get_run_layer_in_subgraph () const noexcept

Parent/child accessors
const Layer &	get_parent_layer (size_t index=0) const

const Layer &	get_child_layer (size_t index=0) const

std::vector< const Layer * >	get_parent_layers () const

std::vector< const Layer * >	get_child_layers () const

size_t	find_parent_layer_index (const Layer &l) const

size_t	find_child_layer_index (const Layer &l) const

int	get_num_parents () const noexcept
	Get number of parent layers. More...

int	get_num_children () const noexcept
	Get number of child layers. More...

Layer pointer manipulation functions
void	add_parent_layer (ViewingLayerPtr parent)
	Add a parent layer. More...

void	add_child_layer (ViewingLayerPtr child)
	Add a child layer. More...

void	replace_parent_layer (ViewingLayerPtr l, size_t index)

void	replace_child_layer (ViewingLayerPtr l, size_t index)

void	clear_parent_layers ()
	Remove pointers to parent layers. More...

void	clear_child_layers ()
	Remove pointers to child layers. More...

ViewingLayerPtr	get_parent_layer_pointer (size_t index) const

ViewingLayerPtr	get_child_layer_pointer (size_t index) const

virtual std::vector< ViewingLayerPtr >	get_layer_pointers ()
	List of pointers to other layers. More...

virtual void	set_layer_pointers (std::vector< ViewingLayerPtr > layers)
	Set list of pointers to other layers. More...

Weights access functions
std::vector< ViewingWeightsPtr >	get_weights_pointers () const
	List of pointers to weights. More...

void	set_weights_pointers (std::vector< ViewingWeightsPtr > ptrs)
	Set list of pointers to weights. More...

void	replace_weights (Layer const &other_layer)
	Replace weights with another Layer's weights. More...

Tensor access functions
virtual const BaseDistMat &	get_activations (const Layer &child) const =0
	Get activation tensor corresponding to child layer. More...

virtual const BaseDistMat &	get_error_signals (const Layer &parent) const =0
	Get error signal tensor corresponding to parent layer. More...

Tensor dimension access functions
std::vector< int >	get_input_dims (size_t input_index=0) const
	Get input tensor dimensions. More...

int	get_input_size (size_t input_index=0) const
	Get input tensor size. More...

std::vector< int >	get_output_dims (size_t output_index=0) const
	Get output tensor dimensions. More...

int	get_output_size (size_t output_index=0) const
	Get output tensor size. More...

void	set_output_dims (std::vector< int > dims, size_t output_index=0)
	Set output tensor dimensions. More...

El::Int	infer_mini_batch_size_from_parents () const

virtual El::Int	current_output_mini_batch_size () const =0

virtual El::Int	infer_mini_batch_size_from_parents_or_default_to_current () const =0

Hint layer access functions
void	set_hint_layer (ViewingLayerPtr l)
	Set hint layer. More...

const Layer *	get_hint_layer () const
	Get hint layer. More...

Freeze management functions
void	freeze ()

void	unfreeze ()

bool	is_frozen () const

Serialization
template<typename ArchiveT >
void	serialize (ArchiveT &ar)

Protected Member Functions
void	setup_grid ()
	Setup process grid. More...

virtual void	setup_pointers ()
	Setup layer pointers. Called by the 'setup' function. Pointers to parent/child layers are assumed to be already initialized. More...

virtual void	setup_dims ()
	Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions. More...

virtual void	setup_matrices (const std::vector< El::Grid *> &grids)=0
	Setup buffers for layer inputs and outputs. More...

virtual void	setup_data (size_t max_mini_batch_size)
	Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices. More...

virtual void	setup_gpu ()
	Setup GPU objects. Called by the 'setup' function if the layer is on GPUs. More...

virtual void	fp_setup_inputs ()=0
	Setup input tensors. Called by the 'forward_prop' function. Each input tensor is setup as a view or copy of the corresponding parent layer's output tensor. More...

virtual void	fp_setup_outputs ()=0
	Setup output tensors. Called by the 'forward_prop' function. Each output tensor is resized to match the mini-batch size. More...

virtual void	fp_compute ()=0
	Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values. More...

virtual void	bp_setup_gradient_wrt_inputs ()=0
	Setup gradient w.r.t. input tensors. Called by the 'back_prop' function. Each gradient w.r.t. input tensor is resized to match the mini-batch size. More...

virtual void	bp_compute ()
	Compute objective funciton gradients. Called by the 'back_prop' function. Given the input, output, and gradient w.r.t. output tensors, the gradient w.r.t. input tensors are populated with the computed values and the gradients w.r.t. the weights are sent to the appropriate optimizers. More...

virtual bool	update_compute ()
	Perform the computation for the update step. Returns false if the layer must reset for a new training epoch. More...

Protected lifecycle functions
	Layer (Layer &&other)=default

	Layer (Layer const &other)

Layer &	operator= (Layer &&other)=default

Layer &	operator= (Layer const &other)

Weights-related accessors
void	add_weights (ViewingWeightsPtr w)

size_t	num_weights () const noexcept

bool	has_weights () const noexcept

bool	has_weights (size_t idx) const noexcept

void	set_num_weights (size_t n)

void	set_weights (size_t idx, ViewingWeightsPtr w)

weights const &	get_weights (size_t idx) const

weights &	get_weights (size_t idx)

void	add_as_gradient_source ()

void	remove_as_gradient_source ()

Protected Attributes
int	m_expected_num_parent_layers = 1

int	m_expected_num_child_layers = 1
	Expected number of child layers. A negative value indicates no limit. More...

model *	m_model = nullptr
	Reference to model managing this layer. More...

bool	m_frozen
	Avoid back prop if frozen. More...

EvalType	m_fp_time
	Time spent in forward propagation. More...

EvalType	m_fp_compute_time
	Time spent in the forward propagation computation. More...

EvalType	m_bp_time
	Time spent in backward propagation. More...

EvalType	m_bp_compute_time
	Time spent in the backward propagation computation. More...

EvalType	m_update_time
	Time spent in updates. More...

std::string	m_name
	Layer instance's name. Each layer in a model should have a unique, preferably human-readable, name. More...

bool	m_runs_inplace = false
	If true, the layer will run in-place (the input and output activations point to the same tensor). Value is set during graph setup (in setup_pointers) based on layer traits and neighboring layers. More...

int	m_grid_tag = -1
	Identifying tag for process grid. More...

SubGraphCommunication	subgraph_communication_method = PT2PT

bool	m_subgraph_parallelism_execution = false

bool	run_layer_in_subgraph = false

std::unique_ptr< std::set< int > >	m_subgrid_ranks

El::Int	m_num_spliting_groups = 1

std::shared_ptr< El::mpi::Comm >	m_interSubGridVCComm

Private Member Functions
virtual void	write_specific_proto (lbann_data::Layer &proto) const =0
	Add layer specific data to prototext. More...

Private Attributes
std::vector< ViewingLayerPtr >	m_parent_layers
	References to parent layers. More...

std::vector< ViewingLayerPtr >	m_child_layers
	References to child layers. More...

std::vector< ViewingWeightsPtr >	m_weights
	References to layer weights. More...

std::vector< std::vector< int > >	m_output_dims_list
	Dimensions of output tensors. More...

ViewingLayerPtr	m_hint_layer
	Hint layer. During setup, the output tensor dimensions are set to match the first output tensor of the hint layer. Derived classes may do more elaborate setup based on the hint layer. More...

ParallelStrategy	m_parallel_strategy
	Parallel strategy for the layer. More...

Friends
class	callback::sync_layers

class	KFAC

template<hydrogen::Device Device>
class	kfac_block_fc_conv

template<hydrogen::Device Device>
class	kfac_block_channelwise_fc

template<hydrogen::Device Device>
class	kfac_block_bn

template<hydrogen::Device Device>
class	kfac_block_gru

Implementation details of back-prop.
void	attempt_move_error_signal (Layer &parent, Layer const &child, std::unique_ptr< BaseDistMat > signal)
	Move error signals from a child to its parent. More...

void	attempt_view_error_signal (Layer &parent, Layer const &child, const BaseDistMat &signals)

void	deep_copy_error_signal (Layer &parent, Layer const &child, const BaseDistMat &signals)

virtual void	back_prop_impl_ ()=0
	Computes the core back-prop steps. More...

virtual void	allocate_new_gradients_ ()=0
	Allocates new storage for the gradients that this layer will compute. More...

virtual void	propagate_error_signals_to_parents_ ()=0
	Moves all error signals to their respective parents. More...

virtual void	clear_prev_error_signals_ ()=0
	Releases the error signals propagated from the child layers. More...

virtual void	move_or_copy_prev_error_signal_ (const Layer &child, std::unique_ptr< El::BaseDistMatrix > signal)=0
	Assumes ownership of the error signals from the specified child layer. More...

virtual void	view_or_copy_prev_error_signal_ (const Layer &child, const El::BaseDistMatrix &signal)=0
	Attempts to view the error signals from the specified child layer. More...

virtual void	deep_copy_prev_error_signal_ (const Layer &child, const El::BaseDistMatrix &signal)=0
	Deep-copy the error signals from the specified child layer. More...

Detailed Description

Neural network tensor operation.

A layer takes input tensors ("previous activations") and applies a mathematical operation to obtain output tensors ("activations"). This operation often has trainable parameters called "weights." The previous activations are recieved from "parent layers" and the activations are sent to "child layers," making each layer a node in a directed graph. The layer graph and the weights are managed by a neural network model class. A layer should also be able to take objective function gradients w.r.t. the outputs ("previous error signals") and compute the objective function gradients w.r.t. the inputs ("error signals") and w.r.t. the weights. This allows the model to perform automatic differentiation and to apply first-order optimization methods to the weights.

Definition at line 285 of file layer.hpp.

Constructor & Destructor Documentation

◆ Layer() [1/3]

lbann::Layer::Layer ( )

◆ ~Layer()

virtual lbann::Layer::~Layer ( )

virtualdefault

◆ Layer() [2/3]

lbann::Layer::Layer ( Layer && other )

protecteddefault

◆ Layer() [3/3]

lbann::Layer::Layer ( Layer const & other )

protected

Member Function Documentation

◆ add_as_gradient_source()

void lbann::Layer::add_as_gradient_source ( )

protected

◆ add_child_layer()

void lbann::Layer::add_child_layer ( ViewingLayerPtr child )

Add a child layer.

Does nothing if child is a null pointer, the same layer, or already a child.

◆ add_parent_layer()

void lbann::Layer::add_parent_layer ( ViewingLayerPtr parent )

Add a parent layer.

Does nothing if parent is a null pointer, the same layer, or already a parent.

◆ add_weights()

void lbann::Layer::add_weights ( ViewingWeightsPtr w )

inlineprotected

Definition at line 723 of file layer.hpp.

Here is the caller graph for this function:

◆ allocate_new_gradients_()

virtual void lbann::Layer::allocate_new_gradients_ ( )

privatepure virtual

Allocates new storage for the gradients that this layer will compute.

If the layer has persistent error signal information, this will simply clear the gradients.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ back_prop()

void lbann::Layer::back_prop ( )

Backward propagation step. Given the objective function gradients w.r.t. the output tensors, compute the gradients w.r.t. the input tensors and w.r.t. the weights. This is essentially an application of the chain rule.

◆ back_prop_impl_()

virtual void lbann::Layer::back_prop_impl_ ( )

privatepure virtual

Computes the core back-prop steps.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ bp_compute()

virtual void lbann::Layer::bp_compute ( )

inlineprotectedvirtual

Compute objective funciton gradients. Called by the 'back_prop' function. Given the input, output, and gradient w.r.t. output tensors, the gradient w.r.t. input tensors are populated with the computed values and the gradients w.r.t. the weights are sent to the appropriate optimizers.

Definition at line 820 of file layer.hpp.

◆ bp_setup_gradient_wrt_inputs()

virtual void lbann::Layer::bp_setup_gradient_wrt_inputs ( )

protectedpure virtual

Setup gradient w.r.t. input tensors. Called by the 'back_prop' function. Each gradient w.r.t. input tensor is resized to match the mini-batch size.

◆ can_run_inplace()

virtual bool lbann::Layer::can_run_inplace ( ) const

inlinevirtual

If True, the computation can run in-place (feeding each input activations tensor as the corresponding output activations)

Definition at line 411 of file layer.hpp.

◆ check_setup()

virtual void lbann::Layer::check_setup ( )

virtual

Check that the setup is reasonable.

Reimplemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ clear_child_layers()

void lbann::Layer::clear_child_layers ( )

inline

Remove pointers to child layers.

Definition at line 601 of file layer.hpp.

◆ clear_parent_layers()

void lbann::Layer::clear_parent_layers ( )

inline

Remove pointers to parent layers.

Definition at line 599 of file layer.hpp.

◆ clear_prev_error_signals_()

virtual void lbann::Layer::clear_prev_error_signals_ ( )

privatepure virtual

Releases the error signals propagated from the child layers.

At the conclusion of back-prop, the error signals propagated from the child layers are no longer needed. This ensures that the memory is released.

This function may do other work, but must respect the persistent error signal flag.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ copy()

virtual Layer* lbann::Layer::copy ( ) const

pure virtual

Copy function. This function dynamically allocates memory for a layer instance and instantiates a copy. The caller is responsible for deallocating the instance.

◆ current_output_mini_batch_size()

virtual El::Int lbann::Layer::current_output_mini_batch_size ( ) const

pure virtual

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ deep_copy_prev_error_signal_()

virtual void lbann::Layer::deep_copy_prev_error_signal_	(	const Layer &	child,
		const El::BaseDistMatrix &	signal
	)

privatepure virtual

Deep-copy the error signals from the specified child layer.

Parameters

child	The layer whence the signal is coming.
signal	The error signals being sent to this layer.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ distconv_enabled()

bool lbann::Layer::distconv_enabled ( ) const

inline

Indicate whether distconv is enabled.

Definition at line 1082 of file layer.hpp.

Here is the caller graph for this function:

◆ find_child_layer_index()

size_t lbann::Layer::find_child_layer_index ( const Layer & l ) const

Here is the caller graph for this function:

◆ find_parent_layer_index()

size_t lbann::Layer::find_parent_layer_index ( const Layer & l ) const

Here is the caller graph for this function:

◆ forward_prop()

virtual void lbann::Layer::forward_prop ( )

pure virtual

Forward propagation step. Apply a mathematical operation to input tensors to obtain output tensors.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ fp_compute()

virtual void lbann::Layer::fp_compute ( )

protectedpure virtual

Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.

◆ fp_setup_inputs()

virtual void lbann::Layer::fp_setup_inputs ( )

protectedpure virtual

Setup input tensors. Called by the 'forward_prop' function. Each input tensor is setup as a view or copy of the corresponding parent layer's output tensor.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ fp_setup_outputs()

virtual void lbann::Layer::fp_setup_outputs ( )

protectedpure virtual

Setup output tensors. Called by the 'forward_prop' function. Each output tensor is resized to match the mini-batch size.

◆ freeze()

void lbann::Layer::freeze ( )

◆ get_activations()

virtual const BaseDistMat& lbann::Layer::get_activations ( const Layer & child ) const

pure virtual

Get activation tensor corresponding to child layer.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ get_backprop_requirements()

virtual int lbann::Layer::get_backprop_requirements ( ) const

inlinevirtual

Returns the necessary tensors for computing backpropagation.

Definition at line 389 of file layer.hpp.

◆ get_child_layer()

const Layer& lbann::Layer::get_child_layer ( size_t index = 0 ) const

◆ get_child_layer_pointer()

ViewingLayerPtr lbann::Layer::get_child_layer_pointer ( size_t index ) const

◆ get_child_layers()

std::vector<const Layer*> lbann::Layer::get_child_layers ( ) const

Here is the caller graph for this function:

◆ get_comm()

lbann_comm* lbann::Layer::get_comm ( ) const

Get reference to LBANN communicator.

Here is the caller graph for this function:

◆ get_communication_flag()

SubGraphCommunication lbann::Layer::get_communication_flag ( )

inline

Definition at line 487 of file layer.hpp.

Here is the caller graph for this function:

◆ get_data_layout()

virtual data_layout lbann::Layer::get_data_layout ( ) const

pure virtual

Get data layout of the data tensors. We assume that the data layouts of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its data layout should override this function to return its template parameter.

◆ get_datatype_name()

virtual std::string lbann::Layer::get_datatype_name ( ) const

pure virtual

Get a string representing the layer datatype.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ get_description()

virtual description lbann::Layer::get_description ( ) const

virtual

Human-readable description.

Here is the caller graph for this function:

◆ get_device_allocation()

virtual El::Device lbann::Layer::get_device_allocation ( ) const

pure virtual

Get the device allocation for the data tensors. We assume that the decice allocation of the previous activations, activations, previous error signals, and error signals are the same. Each concrete layer that is templated on its device allocation should override this function to return its template parameter.

◆ get_error_signals()

virtual const BaseDistMat& lbann::Layer::get_error_signals ( const Layer & parent ) const

pure virtual

Get error signal tensor corresponding to parent layer.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ get_expected_num_child_layers()

int lbann::Layer::get_expected_num_child_layers ( ) const

inlinenoexcept

Get expected number of child layers. A negative value indicates no limit.

Definition at line 381 of file layer.hpp.

◆ get_expected_num_parent_layers()

int lbann::Layer::get_expected_num_parent_layers ( ) const

inlinenoexcept

Get expected number of parent layers. A negative value indicates no limit.

Definition at line 373 of file layer.hpp.

◆ get_grid_tag()

int lbann::Layer::get_grid_tag ( ) const

noexcept

Identifying tag for process grid.

Here is the caller graph for this function:

◆ get_hint_layer()

const Layer* lbann::Layer::get_hint_layer ( ) const

Get hint layer.

Here is the caller graph for this function:

◆ get_input_dims()

std::vector<int> lbann::Layer::get_input_dims ( size_t input_index = 0 ) const

Get input tensor dimensions.

Here is the caller graph for this function:

◆ get_input_size()

int lbann::Layer::get_input_size ( size_t input_index = 0 ) const

Get input tensor size.

Here is the caller graph for this function:

◆ get_layer_pointers()

virtual std::vector<ViewingLayerPtr> lbann::Layer::get_layer_pointers ( )

virtual

List of pointers to other layers.

◆ get_model()

model* lbann::Layer::get_model ( ) const

inlinenoexcept

Get a reference to the model that manages this layer.

May be null if this layer is "free" (e.g., during model assembly or for testing). This will not be null in training applications.

Definition at line 339 of file layer.hpp.

◆ get_mygrid()

std::shared_ptr<El::Grid> lbann::Layer::get_mygrid ( ) const

inline

Definition at line 502 of file layer.hpp.

◆ get_name()

std::string lbann::Layer::get_name ( ) const

inline

Get the layer instance's name.

Each layer in a model should have a unique, preferably human-readable, name.

Definition at line 332 of file layer.hpp.

Here is the caller graph for this function:

◆ get_num_children()

int lbann::Layer::get_num_children ( ) const

inlinenoexcept

Get number of child layers.

Definition at line 576 of file layer.hpp.

Here is the caller graph for this function:

◆ get_num_parents()

int lbann::Layer::get_num_parents ( ) const

inlinenoexcept

Get number of parent layers.

Definition at line 574 of file layer.hpp.

Here is the caller graph for this function:

◆ get_num_spliting_groups()

El::Int lbann::Layer::get_num_spliting_groups ( ) const

inline

Definition at line 499 of file layer.hpp.

◆ get_output_dims()

std::vector<int> lbann::Layer::get_output_dims ( size_t output_index = 0 ) const

Get output tensor dimensions.

Here is the caller graph for this function:

◆ get_output_size()

int lbann::Layer::get_output_size ( size_t output_index = 0 ) const

Get output tensor size.

Here is the caller graph for this function:

◆ get_parallel_strategy() [1/2]

ParallelStrategy& lbann::Layer::get_parallel_strategy ( )

inlinenoexcept

Get the parallel strategy for the layer.

Definition at line 395 of file layer.hpp.

◆ get_parallel_strategy() [2/2]

ParallelStrategy const& lbann::Layer::get_parallel_strategy ( ) const

inlinenoexcept

Get the parallel strategy for the layer.

Definition at line 400 of file layer.hpp.

◆ get_parent_layer()

const Layer& lbann::Layer::get_parent_layer ( size_t index = 0 ) const

Here is the caller graph for this function:

◆ get_parent_layer_pointer()

ViewingLayerPtr lbann::Layer::get_parent_layer_pointer ( size_t index ) const

◆ get_parent_layers()

std::vector<const Layer*> lbann::Layer::get_parent_layers ( ) const

Here is the caller graph for this function:

◆ get_run_layer_in_subgraph()

bool lbann::Layer::get_run_layer_in_subgraph ( ) const

inlinenoexcept

Definition at line 531 of file layer.hpp.

◆ get_type()

virtual std::string lbann::Layer::get_type ( ) const

pure virtual

Get the layer type's name.

A layer type name should be brief, unique, and human-readable description of the layer's mathematical operation that is recognizable to ML practitioners (e.g., "Convolution", "ReLU")

Here is the caller graph for this function:

◆ get_weights() [1/2]

weights const& lbann::Layer::get_weights ( size_t idx ) const

protected

Here is the caller graph for this function:

◆ get_weights() [2/2]

weights& lbann::Layer::get_weights ( size_t idx )

protected

◆ get_weights_pointers()

std::vector<ViewingWeightsPtr> lbann::Layer::get_weights_pointers ( ) const

List of pointers to weights.

◆ has_weights() [1/2]

bool lbann::Layer::has_weights ( ) const

inlineprotectednoexcept

Definition at line 728 of file layer.hpp.

Here is the caller graph for this function:

◆ has_weights() [2/2]

bool lbann::Layer::has_weights ( size_t idx ) const

inlineprotectednoexcept

Definition at line 729 of file layer.hpp.

◆ infer_mini_batch_size_from_parents()

El::Int lbann::Layer::infer_mini_batch_size_from_parents ( ) const

◆ infer_mini_batch_size_from_parents_or_default_to_current()

virtual El::Int lbann::Layer::infer_mini_batch_size_from_parents_or_default_to_current ( ) const

pure virtual

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ is_frozen()

bool lbann::Layer::is_frozen ( ) const

◆ move_or_copy_prev_error_signal_()

virtual void lbann::Layer::move_or_copy_prev_error_signal_	(	const Layer &	child,
		std::unique_ptr< El::BaseDistMatrix >	signal
	)

privatepure virtual

Assumes ownership of the error signals from the specified child layer.

This is a simple pointer move when possible; otherwise it is a deep-copy of the signal data.

Parameters

child	The layer whence the signal is coming.
signal	The error signals being sent to this layer.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ num_weights()

size_t lbann::Layer::num_weights ( ) const

inlineprotectednoexcept

Definition at line 727 of file layer.hpp.

Here is the caller graph for this function:

◆ operator=() [1/2]

Layer& lbann::Layer::operator= ( Layer && other )

protecteddefault

◆ operator=() [2/2]

Layer& lbann::Layer::operator= ( Layer const & other )

protected

◆ propagate_error_signals_to_parents_()

virtual void lbann::Layer::propagate_error_signals_to_parents_ ( )

privatepure virtual

Moves all error signals to their respective parents.

Error signals from this instances either are directly moved into the parent layer or, in cases in which a direct move is not possible, are deep-copied into a new tensor in the parent layer (e.g., into a different data type or data distribution).

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ remove_as_gradient_source()

void lbann::Layer::remove_as_gradient_source ( )

protected

◆ replace_child_layer()

void lbann::Layer::replace_child_layer	(	ViewingLayerPtr	l,
		size_t	index
	)

◆ replace_parent_layer()

void lbann::Layer::replace_parent_layer	(	ViewingLayerPtr	l,
		size_t	index
	)

◆ replace_weights()

void lbann::Layer::replace_weights ( Layer const & other_layer )

Replace weights with another Layer's weights.

◆ reset_counters()

void lbann::Layer::reset_counters ( )

Reset layer stat counters.

◆ reset_inter_subgrid_vc_comm()

void lbann::Layer::reset_inter_subgrid_vc_comm ( std::shared_ptr< El::mpi::Comm > mpi_comm )

inline

Definition at line 509 of file layer.hpp.

◆ runs_inplace()

bool lbann::Layer::runs_inplace ( ) const

inline

If true, the layer will run in-place (the input and output activations point to the same tensor). Value is set during graph setup (in setup_pointers) based on layer traits and neighboring layers.

Definition at line 702 of file layer.hpp.

Here is the call graph for this function:

◆ serialize()

template<typename ArchiveT >

void lbann::Layer::serialize ( ArchiveT & ar )

◆ set_communication_flag()

void lbann::Layer::set_communication_flag ( SubGraphCommunication type )

inline

Definition at line 481 of file layer.hpp.

◆ set_grid_tag()

void lbann::Layer::set_grid_tag ( int tag )

Set process grid.

◆ set_hint_layer()

void lbann::Layer::set_hint_layer ( ViewingLayerPtr l )

Set hint layer.

Properties of the hint layer are used during the setup phase. For instance, the output tensor dimensions are set to match the hint layer's first output tensor.

◆ set_keep_error_signals()

virtual void lbann::Layer::set_keep_error_signals ( bool )

pure virtual

Set whether to keep or dynamically reallocate error signals.

Passing a value of true means to keep the error signals; false means to dynamically reallocate them.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ set_layer_pointers()

virtual void lbann::Layer::set_layer_pointers ( std::vector< ViewingLayerPtr > layers )

virtual

Set list of pointers to other layers.

Input should match output of get_layer_pointers .

◆ set_model()

void lbann::Layer::set_model ( model * m )

inline

Set the model that manages this layer.

Definition at line 321 of file layer.hpp.

◆ set_name()

void lbann::Layer::set_name ( const std::string name )

inline

Set the layer instance's name. Each layer in a model should have a unique, preferably human-readable, name.

Definition at line 319 of file layer.hpp.

Here is the caller graph for this function:

◆ set_num_spliting_groups()

void lbann::Layer::set_num_spliting_groups ( El::Int spliting_groups )

inline

Definition at line 493 of file layer.hpp.

◆ set_num_weights()

void lbann::Layer::set_num_weights ( size_t n )

inlineprotected

Definition at line 733 of file layer.hpp.

Here is the caller graph for this function:

◆ set_output_dims()

void lbann::Layer::set_output_dims	(	std::vector< int >	dims,
		size_t	output_index = `0`
	)

Set output tensor dimensions.

Here is the caller graph for this function:

◆ set_run_layer_in_subgraph()

void lbann::Layer::set_run_layer_in_subgraph ( )

inline

Definition at line 528 of file layer.hpp.

◆ set_subgraph_parallelism_execution()

void lbann::Layer::set_subgraph_parallelism_execution ( )

inline

Definition at line 515 of file layer.hpp.

Here is the caller graph for this function:

◆ set_weights()

void lbann::Layer::set_weights	(	size_t	idx,
		ViewingWeightsPtr	w
	)

inlineprotected

Definition at line 734 of file layer.hpp.

Here is the caller graph for this function:

◆ set_weights_pointers()

void lbann::Layer::set_weights_pointers ( std::vector< ViewingWeightsPtr > ptrs )

Set list of pointers to weights.

◆ setup()

virtual void lbann::Layer::setup	(	size_t	max_mini_batch_size,
		const std::vector< El::Grid *> &	grids
	)

virtual

Setup layer members.

This calls the 'setup_pointers', 'setup_dims', 'setup_matrices', 'setup_data', and 'setup_gpu' (if needed) functions. It is assumed that pointers to parent/child layers have already been initialized.

Here is the caller graph for this function:

◆ setup_data()

virtual void lbann::Layer::setup_data ( size_t max_mini_batch_size )

inlineprotectedvirtual

Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices.

Definition at line 778 of file layer.hpp.

◆ setup_dims()

virtual void lbann::Layer::setup_dims ( )

protectedvirtual

Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.

Here is the caller graph for this function:

◆ setup_gpu()

virtual void lbann::Layer::setup_gpu ( )

inlineprotectedvirtual

Setup GPU objects. Called by the 'setup' function if the layer is on GPUs.

Reimplemented in lbann::pooling_layer< TensorDataType, T_layout, Dev >, lbann::base_convolution_layer< TensorDataType, Device >, lbann::local_response_normalization_layer< TensorDataType, T_layout, Dev >, and lbann::dropout< TensorDataType, T_layout, Dev >.

Definition at line 782 of file layer.hpp.

Here is the caller graph for this function:

◆ setup_grid()

void lbann::Layer::setup_grid ( )

protected

Setup process grid.

◆ setup_matrices()

virtual void lbann::Layer::setup_matrices ( const std::vector< El::Grid *> & grids )

protectedpure virtual

Setup buffers for layer inputs and outputs.

Called by the 'setup' function. Each column of these distributed matrices is interpreted as the flattened tensor for a mini-batch sample. The matrices themselves are constructed by calling the 'construct_matrix' function. If any matrices have already been setup, they are destroyed and reinstantiated.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ setup_pointers()

virtual void lbann::Layer::setup_pointers ( )

protectedvirtual

Setup layer pointers. Called by the 'setup' function. Pointers to parent/child layers are assumed to be already initialized.

Reimplemented in lbann::concatenate_layer< TensorDataType, Layout, Device >, lbann::sum_layer< TensorDataType, T_layout, Dev >, lbann::weighted_sum_layer< TensorDataType, T_layout, Dev >, lbann::unpooling_layer< TensorDataType, T_layout, Dev >, lbann::hadamard_layer< TensorDataType, T_layout, Dev >, lbann::cross_grid_sum_slice_layer< TensorDataType, Dev >, and lbann::cross_grid_sum_layer< TensorDataType, Dev >.

Here is the caller graph for this function:

◆ subgraph_parallelism_execution()

bool lbann::Layer::subgraph_parallelism_execution ( ) const

inlinenoexcept

Definition at line 522 of file layer.hpp.

Here is the caller graph for this function:

◆ summarize_matrices()

virtual void lbann::Layer::summarize_matrices	(	lbann_summary &	summarizer,
		int	step
	)

pure virtual

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ summarize_stats()

void lbann::Layer::summarize_stats	(	lbann_summary &	summarizer,
		int	step
	)

◆ unfreeze()

void lbann::Layer::unfreeze ( )

◆ update()

bool lbann::Layer::update ( )

Update step. Update the layer's internal members. Note that the optimization step for the weights happens elsewhere.

◆ update_compute()

virtual bool lbann::Layer::update_compute ( )

inlineprotectedvirtual

Perform the computation for the update step. Returns false if the layer must reset for a new training epoch.

Definition at line 829 of file layer.hpp.

◆ using_gpus()

bool lbann::Layer::using_gpus ( ) const

inlinenoexcept

Whether the layer is using a GPU implementation.

Definition at line 417 of file layer.hpp.

Here is the caller graph for this function:

◆ view_or_copy_prev_error_signal_()

virtual void lbann::Layer::view_or_copy_prev_error_signal_	(	const Layer &	child,
		const El::BaseDistMatrix &	signal
	)

privatepure virtual

Attempts to view the error signals from the specified child layer.

This is a simple data view when possible; otherwise it is a deep-copy of the signal data.

Parameters

child	The layer whence the signal is coming.
signal	The error signals being sent to this layer.

Implemented in lbann::data_type_layer< InputTensorDataType, OutputTensorDataType >, lbann::data_type_layer< InputT, OutputT >, lbann::data_type_layer< T >, and lbann::data_type_layer< TensorDataType >.

◆ write_proto()

void lbann::Layer::write_proto ( lbann_data::Layer & proto ) const

Write layer to proto file.

◆ write_specific_proto()

virtual void lbann::Layer::write_specific_proto ( lbann_data::Layer & proto ) const

privatepure virtual

Add layer specific data to prototext.

Here is the caller graph for this function:

Friends And Related Function Documentation

◆ attempt_move_error_signal

void attempt_move_error_signal	(	Layer &	parent,
		Layer const &	child,
		std::unique_ptr< BaseDistMat >	signal
	)

friend

Move error signals from a child to its parent.

This is a hacky workaround to C++ rules for protected member functions. No error-checking is done, e.g., to assert that the two layers actually have a parent-child relationship because this is just an implementation detail. The symbol is never exposed to the public API.

Parameters

parent	The parent layer, into which the signal is moved
child	The child layer, from which the signal is moved
signal	The now-released error signal from the child layer

◆ attempt_view_error_signal

void attempt_view_error_signal	(	Layer &	parent,
		Layer const &	child,
		const BaseDistMat &	signals
	)

friend

◆ callback::sync_layers

friend class callback::sync_layers

friend

Definition at line 287 of file layer.hpp.

◆ deep_copy_error_signal

void deep_copy_error_signal	(	Layer &	parent,
		Layer const &	child,
		const BaseDistMat &	signals
	)

friend

◆ KFAC

friend class KFAC

friend

Definition at line 288 of file layer.hpp.

◆ kfac_block_bn

template<hydrogen::Device Device>

friend class kfac_block_bn

friend

Definition at line 294 of file layer.hpp.

◆ kfac_block_channelwise_fc

template<hydrogen::Device Device>

friend class kfac_block_channelwise_fc

friend

Definition at line 292 of file layer.hpp.

◆ kfac_block_fc_conv

template<hydrogen::Device Device>

friend class kfac_block_fc_conv

friend

Definition at line 290 of file layer.hpp.

◆ kfac_block_gru

template<hydrogen::Device Device>

friend class kfac_block_gru

friend

Definition at line 296 of file layer.hpp.

Member Data Documentation

◆ m_bp_compute_time

EvalType lbann::Layer::m_bp_compute_time

protected

Time spent in the backward propagation computation.

Definition at line 857 of file layer.hpp.

◆ m_bp_time

EvalType lbann::Layer::m_bp_time

protected

Time spent in backward propagation.

Definition at line 855 of file layer.hpp.

◆ m_child_layers

std::vector<ViewingLayerPtr> lbann::Layer::m_child_layers

private

References to child layers.

Definition at line 1013 of file layer.hpp.

◆ m_expected_num_child_layers

int lbann::Layer::m_expected_num_child_layers = 1

protected

Expected number of child layers. A negative value indicates no limit.

Definition at line 842 of file layer.hpp.

◆ m_expected_num_parent_layers

int lbann::Layer::m_expected_num_parent_layers = 1

protected

Expected number of parent layers. A negative value indicates no limit.

Definition at line 838 of file layer.hpp.

◆ m_fp_compute_time

EvalType lbann::Layer::m_fp_compute_time

protected

Time spent in the forward propagation computation.

Definition at line 853 of file layer.hpp.

◆ m_fp_time

EvalType lbann::Layer::m_fp_time

protected

Time spent in forward propagation.

Definition at line 851 of file layer.hpp.

◆ m_frozen

bool lbann::Layer::m_frozen

protected

Avoid back prop if frozen.

Definition at line 848 of file layer.hpp.

◆ m_grid_tag

int lbann::Layer::m_grid_tag = -1

protected

Identifying tag for process grid.

Todo:: tym: Clean up and document

If the tag is negative, the process grid is chosen based on heuristics. In particular, the layer will attempt to use the same grid as its parent layers, reverting to the trainer grid if not possible.

Definition at line 886 of file layer.hpp.

◆ m_hint_layer

ViewingLayerPtr lbann::Layer::m_hint_layer

private

Hint layer. During setup, the output tensor dimensions are set to match the first output tensor of the hint layer. Derived classes may do more elaborate setup based on the hint layer.

Definition at line 1033 of file layer.hpp.

◆ m_interSubGridVCComm

std::shared_ptr<El::mpi::Comm> lbann::Layer::m_interSubGridVCComm

protected

Definition at line 908 of file layer.hpp.

◆ m_model

model* lbann::Layer::m_model = nullptr

protected

Reference to model managing this layer.

Definition at line 845 of file layer.hpp.

◆ m_name

std::string lbann::Layer::m_name

protected

Layer instance's name. Each layer in a model should have a unique, preferably human-readable, name.

Definition at line 865 of file layer.hpp.

◆ m_num_spliting_groups

El::Int lbann::Layer::m_num_spliting_groups = 1

protected

Definition at line 907 of file layer.hpp.

◆ m_output_dims_list

std::vector<std::vector<int> > lbann::Layer::m_output_dims_list

private

Dimensions of output tensors.

Definition at line 1026 of file layer.hpp.

◆ m_parallel_strategy

ParallelStrategy lbann::Layer::m_parallel_strategy

private

Parallel strategy for the layer.

Definition at line 1036 of file layer.hpp.

◆ m_parent_layers

std::vector<ViewingLayerPtr> lbann::Layer::m_parent_layers

private

References to parent layers.

Definition at line 1011 of file layer.hpp.

◆ m_runs_inplace

bool lbann::Layer::m_runs_inplace = false

protected

If true, the layer will run in-place (the input and output activations point to the same tensor). Value is set during graph setup (in setup_pointers) based on layer traits and neighboring layers.

Definition at line 872 of file layer.hpp.

◆ m_subgraph_parallelism_execution

bool lbann::Layer::m_subgraph_parallelism_execution = false

protected

Definition at line 900 of file layer.hpp.

◆ m_subgrid_ranks

std::unique_ptr<std::set<int> > lbann::Layer::m_subgrid_ranks

protected

Ranks in grid for the sub-graph

Definition at line 905 of file layer.hpp.

◆ m_update_time

EvalType lbann::Layer::m_update_time

protected

Time spent in updates.

Definition at line 859 of file layer.hpp.

◆ m_weights

std::vector<ViewingWeightsPtr> lbann::Layer::m_weights

private

References to layer weights.

These are references to the base weights objects. The tensor data type for weights storage might differ from the tensor data type of this layer's tensors. To ensure consistency, we must only access weights values through the WeightsProxy class during training.

Definition at line 1023 of file layer.hpp.

◆ run_layer_in_subgraph

bool lbann::Layer::run_layer_in_subgraph = false

protected

Definition at line 902 of file layer.hpp.

◆ subgraph_communication_method

SubGraphCommunication lbann::Layer::subgraph_communication_method = PT2PT

protected

Todo:: Remove

Definition at line 893 of file layer.hpp.

The documentation for this class was generated from the following file:

layer.hpp

Public Member Functions

Protected Member Functions

Protected Attributes

Private Member Functions

Private Attributes

Friends

Implementation details of back-prop.

Detailed Description

Constructor & Destructor Documentation

◆ Layer() [1/3]

◆ ~Layer()

◆ Layer() [2/3]

◆ Layer() [3/3]

Member Function Documentation

◆ add_as_gradient_source()

◆ add_child_layer()

◆ add_parent_layer()

◆ add_weights()

◆ allocate_new_gradients_()

◆ back_prop()

◆ back_prop_impl_()

◆ bp_compute()

◆ bp_setup_gradient_wrt_inputs()

◆ can_run_inplace()

◆ check_setup()

◆ clear_child_layers()

◆ clear_parent_layers()

◆ clear_prev_error_signals_()

◆ copy()

◆ current_output_mini_batch_size()

◆ deep_copy_prev_error_signal_()

◆ distconv_enabled()

◆ find_child_layer_index()

◆ find_parent_layer_index()

◆ forward_prop()

◆ fp_compute()

◆ fp_setup_inputs()

◆ fp_setup_outputs()

◆ freeze()

◆ get_activations()

◆ get_backprop_requirements()

◆ get_child_layer()

◆ get_child_layer_pointer()

◆ get_child_layers()

◆ get_comm()

◆ get_communication_flag()

◆ get_data_layout()

◆ get_datatype_name()

◆ get_description()

◆ get_device_allocation()

◆ get_error_signals()

◆ get_expected_num_child_layers()

◆ get_expected_num_parent_layers()

◆ get_grid_tag()

◆ get_hint_layer()

◆ get_input_dims()

◆ get_input_size()

◆ get_layer_pointers()

◆ get_model()

◆ get_mygrid()

◆ get_name()

◆ get_num_children()

◆ get_num_parents()

◆ get_num_spliting_groups()

◆ get_output_dims()

◆ get_output_size()

◆ get_parallel_strategy() [1/2]

◆ get_parallel_strategy() [2/2]

◆ get_parent_layer()

◆ get_parent_layer_pointer()

◆ get_parent_layers()

◆ get_run_layer_in_subgraph()

◆ get_type()

◆ get_weights() [1/2]

◆ get_weights() [2/2]

◆ get_weights_pointers()

◆ has_weights() [1/2]

◆ has_weights() [2/2]

◆ infer_mini_batch_size_from_parents()

◆ infer_mini_batch_size_from_parents_or_default_to_current()