LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
lbann::model Class Reference

Abstract base class for neural network models. More...

#include <model.hpp>

Collaboration diagram for lbann::model:
[legend]

Public Member Functions

 model (lbann_comm *comm, std::unique_ptr< objective_function > obj_fn, std::unique_ptr< lbann_data::Optimizer > default_optimizer_msg=nullptr)
 
 model (const model &other)
 
modeloperator= (const model &other)
 
 ~model ()=default
 
void copy_trained_weights_from (std::vector< weights *> &w)
 Copy trained weights from input parameter w. More...
 
template<typename TensorDataType >
std::unique_ptr< optimizercreate_optimizer () const
 Construct an instance of the default optimizer. More...
 
void allow_background_io_activity (bool enable) noexcept
 Set a flag that can be used to enable / disable the background I/O activities. More...
 
bool background_io_activity_allowed () const noexcept
 Are background I/O activities enabled by the input layers. More...
 
void setup (size_t max_mini_batch_size, const std::vector< El::Grid *> &grids, bool force=false)
 
std::vector< observer_ptr< callback_base > > get_callbacks ()
 Get the list of callbacks for the model. More...
 
std::vector< std::shared_ptr< callback_base > > & get_callbacks_with_ownership () noexcept
 
bool has_valid_execution_context () const noexcept
 
ExecutionContext const & get_execution_context () const
 
ExecutionContextget_execution_context ()
 
void reset_mode (ExecutionContext &context, execution_mode mode)
 Reset model pointer and execution mode. More...
 
void reset_epoch_statistics (execution_mode mode)
 Reset model statistics for an epoch. More...
 
void forward_prop (execution_mode mode)
 Forward propagation step. More...
 
void backward_prop (bool compute_weight_grads_only=true)
 Backward propagation step. More...
 
void evaluate_metrics (execution_mode mode, size_t current_mini_batch_size)
 
void clear_gradients ()
 Clear each optimizer's gradient. More...
 
void update_weights ()
 Update weights step. More...
 
bool update_layers ()
 Update layers step. More...
 
void reconcile_weight_values ()
 Reconcile weight values. More...
 
void do_setup_end_cbs ()
 Execute callbacks at end of setup. More...
 
void do_model_forward_prop_begin_cbs (execution_mode mode)
 Execute callbacks at start of model forward propagation. More...
 
void do_model_forward_prop_end_cbs (execution_mode mode)
 Execute callbacks at end of model forward propagation. More...
 
void do_layer_forward_prop_begin_cbs (execution_mode mode, Layer *l)
 Execute callbacks at start of layer forward propagation. More...
 
void do_layer_forward_prop_end_cbs (execution_mode mode, Layer *l)
 Execute callbacks at end of layer forward propagation. More...
 
void do_model_backward_prop_begin_cbs ()
 Execute callbacks at start of model backward propagation. More...
 
void do_model_backward_prop_end_cbs ()
 Execute callbacks at end of model backward propagation. More...
 
void do_layer_backward_prop_begin_cbs (Layer *l)
 Execute callbacks at start of layer backward propagation. More...
 
void do_layer_backward_prop_end_cbs (Layer *l)
 Execute callbacks at end of layer backward propagation. More...
 
void do_model_optimize_begin_cbs ()
 Execute callbacks at start of model optimization. More...
 
void do_model_optimize_end_cbs ()
 Execute callbacks at end of model optimization. More...
 
void do_weight_optimize_begin_cbs (weights *w)
 Execute callbacks at the start of weight optimization. More...
 
void do_weight_optimize_end_cbs (weights *w)
 Execute callbacks at the end of weight optimization. More...
 
El::Int get_max_mini_batch_size () const noexcept
 Return the maximum mini-batch size. More...
 
El::Int get_current_mini_batch_size () const noexcept
 Return the current mini-batch size. More...
 
void set_current_mini_batch_size (El::Int) noexcept
 Set the current mini-batch size. More...
 
void set_name (std::string name)
 Metadata Accessors. More...
 
std::string get_name () const noexcept
 Model instance name. More...
 
description get_description () const
 Human-readable description. More...
 
lbann_commget_comm () const noexcept
 Get the model's comm. More...
 
El::Int get_num_layers () const noexcept
 Machine-learning object accessors. More...
 
Layerget_layer (El::Int pos)
 
Layer const & get_layer (El::Int pos) const
 
std::vector< Layer * > get_layers ()
 Return list of layers in model. More...
 
std::vector< Layer const * > get_layers () const
 Return list of layers in model. More...
 
std::vector< weights * > get_weights ()
 
std::vector< weights const * > get_weights () const
 
std::vector< ViewingWeightsPtrget_weights_pointers () const
 
observer_ptr< objective_function const > get_objective_function () const noexcept
 Mathematical function to be minimized during training. More...
 
observer_ptr< objective_functionget_objective_function () noexcept
 
std::vector< metric * > get_metrics ()
 Return the model's metrics. More...
 
std::vector< metric const * > get_metrics () const
 
Model specification
void add_layer (OwningLayerPtr &&l)
 Add layer to model. More...
 
void add_weights (OwningWeightsPtr &&w)
 Add weights to model. More...
 
void remove_weights (std::string const &name)
 Remove weights from model. More...
 
void add_callback (std::shared_ptr< callback_base > cb)
 Register a new callback for the model. More...
 
void add_metric (std::unique_ptr< metric > m)
 Register a new metric for the model. More...
 
void insert_layer (OwningLayerPtr &&l, std::string const &parent_name)
 Insert layer in model. More...
 
void remove_layer (std::string const &name)
 Remove layer from model. More...
 
void replace_layer (OwningLayerPtr &&l, std::string const &name)
 Replace layer in model. More...
 
void swap_layers (model &other)
 
void swap_weights (model &other)
 
void swap_metrics (model &other)
 
void swap_objective_function (model &other)
 
Summarization
void summarize_stats (lbann_summary &summarizer)
 Summarize statistics (e.g. timers, counters). More...
 
void summarize_matrices (lbann_summary &summarizer)
 Summarize matrices (e.g. means). More...
 
Checkpointing and serialization.
template<class Archive >
void serialize (Archive &ar)
 Serialization for checkpoint and restart with Cereal. More...
 
bool save_to_checkpoint_shared (persist &p)
 Checkpoint model to given file descriptor, return number of bytes written. More...
 
bool load_from_checkpoint_shared (persist &p)
 Restore model by reading checkpoint from given file descriptor, return number of bytes read. More...
 
bool save_to_checkpoint_distributed (persist &p)
 
bool load_from_checkpoint_distributed (persist &p)
 
void write_proto (lbann_data::Model &proto)
 Write model to proto file. More...
 
void save_model ()
 Saves the model explicitly if the save_model callback is present. More...
 
void set_subgrid_communication_type (int type) noexcept
 Subgraph Parallelism Interface. More...
 
int get_subgrid_communication_type () const noexcept
 
void set_subgraph_num_parent_resources (int num_resources) noexcept
 
int get_subgraph_num_parent_resources () const noexcept
 
void set_subgrid_topology (bool type) noexcept
 
bool get_subgrid_topology () const noexcept
 
void enable_subgraph_parallelism () noexcept
 
bool is_subgraph_parallelism_enabled () const noexcept
 
int get_num_resources_non_branch_layers () const noexcept
 
int get_num_resources_branch_layers () const noexcept
 
void set_num_resources_non_branch_layers (int num) noexcept
 
void set_num_resources_branch_layers (int num) noexcept
 

Private Member Functions

 model ()
 
void add_evaluation_layers (std::unordered_set< Layer *> &layer_set, std::unordered_set< std::string > &layer_names)
 Insert evaluation layers where needed. More...
 
void add_dummy_layers (std::unordered_set< std::string > &layer_names)
 Insert dummy layers after layers with too few children. More...
 
void add_split_layers (std::unordered_set< std::string > &layer_names)
 Insert split layers after layers with too many children. More...
 
void ensure_input_layers_first ()
 
void reorder_layers (const std::vector< El::Int > &gather_indices)
 Setup-related implementation. More...
 
void remap_pointers (const std::unordered_map< Layer *, ViewingLayerPtr > &layer_map, const std::unordered_map< weights *, ViewingWeightsPtr > &weights_map)
 Remap pointers. More...
 
void setup_layer_topology ()
 Set up topology of layer graph. More...
 
void setup_layer_execution_order ()
 Set up layer execution order. More...
 
void setup_layer_grid_tags (const std::vector< El::Grid *> &grids)
 Set up grid tags for all layers. More...
 
void setup_layers (size_t max_mini_batch_size, const std::vector< El::Grid *> &grids)
 Set up layers. More...
 
void setup_weights ()
 Set up weights. More...
 
Subgraph parallelism implementation
void setup_subgrids ()
 Setup sub grids for the sub graph parallelism. More...
 
void get_subgrids_order (std::vector< int > &ranks_order, int num_branches)
 
int get_max_subgraph_branches ()
 
void check_subgraph_parallelism ()
 
void setup_subgrid_layers_run_condition ()
 
void get_parent_subgrid_tags (int layer_index)
 
void get_subgraph_subgrids_ranks (std::vector< int > &parent_ranks, std::vector< int > &subgrid_ranks, int layer_index, int number_ranks_in_grid)
 
void get_resources_for_spliting_point (std::vector< int > &parent_ranks, std::vector< int > &subgrid_ranks, int layer_index, int number_ranks_in_grid, int num_subgrids)
 
void get_resources_for_merge_layers (std::set< int > &pooled_set, int child_index, int num_subgrids)
 
void get_resources_for_input_layer (std::vector< int > &masterSubGrid, int num_subgrids)
 
void setup_subcommunicators (const std::vector< El::Grid *> &grids)
 

Private Attributes

std::unordered_map< std::string, std::shared_ptr< El::Grid > > grids
 
std::unordered_map< std::string, std::shared_ptr< El::mpi::Comm > > subCommunicatorsSubgrids
 
std::unordered_map< std::string, std::unique_ptr< El::mpi::Group > > grids_mpi_groups
 
observer_ptr< ExecutionContextm_execution_context
 
lbann_commm_comm
 LBANN communicator. More...
 
int vector_communication_subgraph = 0
 
int subgraph_num_resources_parent = 0
 
bool enable_subgraph_topology = false
 
bool apply_subgraph_parallelism = false
 
int num_resources_branch_layers
 
int num_resources_non_branch_layers
 
std::string m_name
 Model instance's name. More...
 
std::vector< OwningLayerPtrm_layers
 Tensor operations. More...
 
std::vector< OwningWeightsPtrm_weights
 Trainable parameters. More...
 
std::unique_ptr< lbann_data::Optimizer > m_default_optimizer_msg
 
std::unique_ptr< objective_functionm_objective_function
 Mathematical function to be minimized during training. More...
 
std::vector< std::unique_ptr< metric > > m_metrics
 Numerical quantities to evaluate model performance. More...
 
std::vector< std::shared_ptr< callback_base > > m_callbacks
 Current callbacks to process. More...
 
bool m_background_io_allowed = true
 Flag that allows input layers to fetch data in the background. More...
 
bool m_model_is_setup = false
 Is the model setup. More...
 
El::Int m_max_mini_batch_size
 The maximum mini-batch size. More...
 
El::Int m_current_mini_batch_size
 The current mini-batch size. More...
 

Detailed Description

Abstract base class for neural network models.

Definition at line 83 of file model.hpp.

Constructor & Destructor Documentation

◆ model() [1/3]

lbann::model::model ( lbann_comm comm,
std::unique_ptr< objective_function obj_fn,
std::unique_ptr< lbann_data::Optimizer >  default_optimizer_msg = nullptr 
)

◆ model() [2/3]

lbann::model::model ( const model other)

◆ ~model()

lbann::model::~model ( )
default

◆ model() [3/3]

lbann::model::model ( )
private

Member Function Documentation

◆ add_callback()

void lbann::model::add_callback ( std::shared_ptr< callback_base cb)

Register a new callback for the model.

◆ add_dummy_layers()

void lbann::model::add_dummy_layers ( std::unordered_set< std::string > &  layer_names)
private

Insert dummy layers after layers with too few children.

If a layer expects more child layers than it has, add dummy layers until it has enough children.

Parameters
layer_namesNames of layers in model. Updated with any newly created layers.

◆ add_evaluation_layers()

void lbann::model::add_evaluation_layers ( std::unordered_set< Layer *> &  layer_set,
std::unordered_set< std::string > &  layer_names 
)
private

Insert evaluation layers where needed.

If a lbann::layer_term or lbann::layer_metric corresponds to a layer that is not an evaluation_layer, an evaluation layer is created and added to the model.

Parameters
layer_setLayers in model. Updated with any newly created layers.
layer_namesNames of layers in model. Updated with any newly created layers.

◆ add_layer()

void lbann::model::add_layer ( OwningLayerPtr &&  l)

Add layer to model.

◆ add_metric()

void lbann::model::add_metric ( std::unique_ptr< metric m)

Register a new metric for the model.

◆ add_split_layers()

void lbann::model::add_split_layers ( std::unordered_set< std::string > &  layer_names)
private

Insert split layers after layers with too many children.

If a layer expects one child layer but has multiple, add a split layer to the model.

Parameters
layer_namesNames of layers in model. Updated with any newly created layers.

◆ add_weights()

void lbann::model::add_weights ( OwningWeightsPtr &&  w)

Add weights to model.

Here is the caller graph for this function:

◆ allow_background_io_activity()

void lbann::model::allow_background_io_activity ( bool  enable)
inlinenoexcept

Set a flag that can be used to enable / disable the background I/O activities.

Definition at line 681 of file model.hpp.

◆ background_io_activity_allowed()

bool lbann::model::background_io_activity_allowed ( ) const
inlinenoexcept

Are background I/O activities enabled by the input layers.

Definition at line 686 of file model.hpp.

◆ backward_prop()

void lbann::model::backward_prop ( bool  compute_weight_grads_only = true)

Backward propagation step.

◆ check_subgraph_parallelism()

void lbann::model::check_subgraph_parallelism ( )
private

◆ clear_gradients()

void lbann::model::clear_gradients ( )

Clear each optimizer's gradient.

This must be called before training forward prop since layers set an optimizer flag during forward prop.

◆ copy_trained_weights_from()

void lbann::model::copy_trained_weights_from ( std::vector< weights *> &  w)

Copy trained weights from input parameter w.

Only weight values are placed, pointers and layer structure are in place. Weights to be copied are of the same name

◆ create_optimizer()

template<typename TensorDataType >
std::unique_ptr< optimizer > lbann::model::create_optimizer ( ) const
inline

Construct an instance of the default optimizer.

If there is no default optimizer, a null pointer is returned.

Definition at line 674 of file model.hpp.

◆ do_layer_backward_prop_begin_cbs()

void lbann::model::do_layer_backward_prop_begin_cbs ( Layer l)

Execute callbacks at start of layer backward propagation.

◆ do_layer_backward_prop_end_cbs()

void lbann::model::do_layer_backward_prop_end_cbs ( Layer l)

Execute callbacks at end of layer backward propagation.

◆ do_layer_forward_prop_begin_cbs()

void lbann::model::do_layer_forward_prop_begin_cbs ( execution_mode  mode,
Layer l 
)

Execute callbacks at start of layer forward propagation.

◆ do_layer_forward_prop_end_cbs()

void lbann::model::do_layer_forward_prop_end_cbs ( execution_mode  mode,
Layer l 
)

Execute callbacks at end of layer forward propagation.

◆ do_model_backward_prop_begin_cbs()

void lbann::model::do_model_backward_prop_begin_cbs ( )

Execute callbacks at start of model backward propagation.

◆ do_model_backward_prop_end_cbs()

void lbann::model::do_model_backward_prop_end_cbs ( )

Execute callbacks at end of model backward propagation.

◆ do_model_forward_prop_begin_cbs()

void lbann::model::do_model_forward_prop_begin_cbs ( execution_mode  mode)

Execute callbacks at start of model forward propagation.

◆ do_model_forward_prop_end_cbs()

void lbann::model::do_model_forward_prop_end_cbs ( execution_mode  mode)

Execute callbacks at end of model forward propagation.

◆ do_model_optimize_begin_cbs()

void lbann::model::do_model_optimize_begin_cbs ( )

Execute callbacks at start of model optimization.

◆ do_model_optimize_end_cbs()

void lbann::model::do_model_optimize_end_cbs ( )

Execute callbacks at end of model optimization.

◆ do_setup_end_cbs()

void lbann::model::do_setup_end_cbs ( )

Execute callbacks at end of setup.

◆ do_weight_optimize_begin_cbs()

void lbann::model::do_weight_optimize_begin_cbs ( weights w)

Execute callbacks at the start of weight optimization.

◆ do_weight_optimize_end_cbs()

void lbann::model::do_weight_optimize_end_cbs ( weights w)

Execute callbacks at the end of weight optimization.

◆ enable_subgraph_parallelism()

void lbann::model::enable_subgraph_parallelism ( )
inlinenoexcept

Definition at line 721 of file model.hpp.

◆ ensure_input_layers_first()

void lbann::model::ensure_input_layers_first ( )
private

◆ evaluate_metrics()

void lbann::model::evaluate_metrics ( execution_mode  mode,
size_t  current_mini_batch_size 
)

Evaluate any metrics in the model

◆ forward_prop()

void lbann::model::forward_prop ( execution_mode  mode)

Forward propagation step.

Here is the caller graph for this function:

◆ get_callbacks()

std::vector< observer_ptr< callback_base > > lbann::model::get_callbacks ( )
inline

Get the list of callbacks for the model.

Definition at line 636 of file model.hpp.

◆ get_callbacks_with_ownership()

std::vector< std::shared_ptr< callback_base > > & lbann::model::get_callbacks_with_ownership ( )
inlinenoexcept

Definition at line 647 of file model.hpp.

◆ get_comm()

lbann_comm * lbann::model::get_comm ( ) const
inlinenoexcept

Get the model's comm.

Definition at line 652 of file model.hpp.

Here is the caller graph for this function:

◆ get_current_mini_batch_size()

El::Int lbann::model::get_current_mini_batch_size ( ) const
inlinenoexcept

Return the current mini-batch size.

Definition at line 756 of file model.hpp.

◆ get_description()

description lbann::model::get_description ( ) const

Human-readable description.

◆ get_execution_context() [1/2]

ExecutionContext const & lbann::model::get_execution_context ( ) const
inline

Grab the training context of the model

Definition at line 659 of file model.hpp.

Here is the caller graph for this function:

◆ get_execution_context() [2/2]

ExecutionContext & lbann::model::get_execution_context ( )
inline

Grab the training context of the model

Definition at line 667 of file model.hpp.

Here is the call graph for this function:

◆ get_layer() [1/2]

Layer& lbann::model::get_layer ( El::Int  pos)
Parameters
posPosition in model's list of layers.
Here is the caller graph for this function:

◆ get_layer() [2/2]

Layer const& lbann::model::get_layer ( El::Int  pos) const
Parameters
posPosition in model's list of layers.

◆ get_layers() [1/2]

std::vector<Layer*> lbann::model::get_layers ( )

Return list of layers in model.

The list is in execution order for forward propagation.

Here is the caller graph for this function:

◆ get_layers() [2/2]

std::vector<Layer const*> lbann::model::get_layers ( ) const

Return list of layers in model.

The list is in execution order for forward propagation.

◆ get_max_mini_batch_size()

El::Int lbann::model::get_max_mini_batch_size ( ) const
inlinenoexcept

Return the maximum mini-batch size.

Definition at line 751 of file model.hpp.

◆ get_max_subgraph_branches()

int lbann::model::get_max_subgraph_branches ( )
private

◆ get_metrics() [1/2]

std::vector<metric*> lbann::model::get_metrics ( )

Return the model's metrics.

◆ get_metrics() [2/2]

std::vector<metric const*> lbann::model::get_metrics ( ) const

◆ get_name()

std::string lbann::model::get_name ( ) const
inlinenoexcept

Model instance name.

Each model in a trainer should have a unique, and preferably human-readable, name.

Definition at line 623 of file model.hpp.

◆ get_num_layers()

El::Int lbann::model::get_num_layers ( ) const
noexcept

Machine-learning object accessors.

Size of model's list of layers.

Here is the caller graph for this function:

◆ get_num_resources_branch_layers()

int lbann::model::get_num_resources_branch_layers ( ) const
inlinenoexcept

Definition at line 736 of file model.hpp.

◆ get_num_resources_non_branch_layers()

int lbann::model::get_num_resources_non_branch_layers ( ) const
inlinenoexcept

Definition at line 731 of file model.hpp.

◆ get_objective_function() [1/2]

observer_ptr< objective_function const > lbann::model::get_objective_function ( ) const
inlinenoexcept

Mathematical function to be minimized during training.

Definition at line 631 of file model.hpp.

◆ get_objective_function() [2/2]

observer_ptr< objective_function > lbann::model::get_objective_function ( )
inlinenoexcept

Definition at line 625 of file model.hpp.

◆ get_parent_subgrid_tags()

void lbann::model::get_parent_subgrid_tags ( int  layer_index)
private

◆ get_resources_for_input_layer()

void lbann::model::get_resources_for_input_layer ( std::vector< int > &  masterSubGrid,
int  num_subgrids 
)
private

◆ get_resources_for_merge_layers()

void lbann::model::get_resources_for_merge_layers ( std::set< int > &  pooled_set,
int  child_index,
int  num_subgrids 
)
private

◆ get_resources_for_spliting_point()

void lbann::model::get_resources_for_spliting_point ( std::vector< int > &  parent_ranks,
std::vector< int > &  subgrid_ranks,
int  layer_index,
int  number_ranks_in_grid,
int  num_subgrids 
)
private

◆ get_subgraph_num_parent_resources()

int lbann::model::get_subgraph_num_parent_resources ( ) const
inlinenoexcept

Definition at line 706 of file model.hpp.

◆ get_subgraph_subgrids_ranks()

void lbann::model::get_subgraph_subgrids_ranks ( std::vector< int > &  parent_ranks,
std::vector< int > &  subgrid_ranks,
int  layer_index,
int  number_ranks_in_grid 
)
private

◆ get_subgrid_communication_type()

int lbann::model::get_subgrid_communication_type ( ) const
inlinenoexcept

Definition at line 696 of file model.hpp.

◆ get_subgrid_topology()

bool lbann::model::get_subgrid_topology ( ) const
inlinenoexcept

Definition at line 716 of file model.hpp.

◆ get_subgrids_order()

void lbann::model::get_subgrids_order ( std::vector< int > &  ranks_order,
int  num_branches 
)
private

◆ get_weights() [1/2]

std::vector<weights*> lbann::model::get_weights ( )
Here is the caller graph for this function:

◆ get_weights() [2/2]

std::vector<weights const*> lbann::model::get_weights ( ) const

◆ get_weights_pointers()

std::vector<ViewingWeightsPtr> lbann::model::get_weights_pointers ( ) const

◆ has_valid_execution_context()

bool lbann::model::has_valid_execution_context ( ) const
inlinenoexcept

Check to see if there is a valid training context for the model

Definition at line 654 of file model.hpp.

◆ insert_layer()

void lbann::model::insert_layer ( OwningLayerPtr &&  l,
std::string const &  parent_name 
)

Insert layer in model.

◆ is_subgraph_parallelism_enabled()

bool lbann::model::is_subgraph_parallelism_enabled ( ) const
inlinenoexcept

Definition at line 726 of file model.hpp.

◆ load_from_checkpoint_distributed()

bool lbann::model::load_from_checkpoint_distributed ( persist p)

◆ load_from_checkpoint_shared()

bool lbann::model::load_from_checkpoint_shared ( persist p)

Restore model by reading checkpoint from given file descriptor, return number of bytes read.

◆ operator=()

model& lbann::model::operator= ( const model other)

◆ reconcile_weight_values()

void lbann::model::reconcile_weight_values ( )

Reconcile weight values.

If weight values are duplicated across multiple processes, they are set to the average across the processes.

◆ remap_pointers()

void lbann::model::remap_pointers ( const std::unordered_map< Layer *, ViewingLayerPtr > &  layer_map,
const std::unordered_map< weights *, ViewingWeightsPtr > &  weights_map 
)
private

Remap pointers.

Layer and weights pointers are remapped using the provided maps. If a pointer is not a key in the corresponding map, the pointer is not changed.

◆ remove_layer()

void lbann::model::remove_layer ( std::string const &  name)

Remove layer from model.

◆ remove_weights()

void lbann::model::remove_weights ( std::string const &  name)

Remove weights from model.

◆ reorder_layers()

void lbann::model::reorder_layers ( const std::vector< El::Int > &  gather_indices)
private

Setup-related implementation.

Reorder layer list with a gather.

The new layer list is the same length as gather_indices and its entries are given by

\[ \text{new\_list}[i] = \text{old\_list}[\text{gather\_indices}[i]] \]

Since entries in the layer list must be unique, this will fail if gather_indices has any repeated entries.

◆ replace_layer()

void lbann::model::replace_layer ( OwningLayerPtr &&  l,
std::string const &  name 
)

Replace layer in model.

◆ reset_epoch_statistics()

void lbann::model::reset_epoch_statistics ( execution_mode  mode)

Reset model statistics for an epoch.

◆ reset_mode()

void lbann::model::reset_mode ( ExecutionContext context,
execution_mode  mode 
)

Reset model pointer and execution mode.

◆ save_model()

void lbann::model::save_model ( )

Saves the model explicitly if the save_model callback is present.

Deprecated:
This function both holds on to the notion that models support callbacks (the majority of those in the current iteration of callbacks should be thought of as extensions to training algorithms rather than extensions of models) and is only used by the "cycgan" and "aecycgan" drivers, which themselves are not well-supported.

◆ save_to_checkpoint_distributed()

bool lbann::model::save_to_checkpoint_distributed ( persist p)

◆ save_to_checkpoint_shared()

bool lbann::model::save_to_checkpoint_shared ( persist p)

Checkpoint model to given file descriptor, return number of bytes written.

◆ serialize()

template<class Archive >
void lbann::model::serialize ( Archive &  ar)

Serialization for checkpoint and restart with Cereal.

◆ set_current_mini_batch_size()

void lbann::model::set_current_mini_batch_size ( El::Int  mini_batch_size)
inlinenoexcept

Set the current mini-batch size.

Definition at line 761 of file model.hpp.

◆ set_name()

void lbann::model::set_name ( std::string  name)

Metadata Accessors.

Model instance name.

Each model in a trainer should have a unique, and preferably human-readable, name.

Here is the caller graph for this function:

◆ set_num_resources_branch_layers()

void lbann::model::set_num_resources_branch_layers ( int  num)
inlinenoexcept

Definition at line 746 of file model.hpp.

◆ set_num_resources_non_branch_layers()

void lbann::model::set_num_resources_non_branch_layers ( int  num)
inlinenoexcept

Definition at line 741 of file model.hpp.

◆ set_subgraph_num_parent_resources()

void lbann::model::set_subgraph_num_parent_resources ( int  num_resources)
inlinenoexcept

Definition at line 701 of file model.hpp.

◆ set_subgrid_communication_type()

void lbann::model::set_subgrid_communication_type ( int  type)
inlinenoexcept

Subgraph Parallelism Interface.

Definition at line 691 of file model.hpp.

◆ set_subgrid_topology()

void lbann::model::set_subgrid_topology ( bool  type)
inlinenoexcept

Definition at line 711 of file model.hpp.

◆ setup()

void lbann::model::setup ( size_t  max_mini_batch_size,
const std::vector< El::Grid *> &  grids,
bool  force = false 
)

Must be called after model specification and before execution.

◆ setup_layer_execution_order()

void lbann::model::setup_layer_execution_order ( )
private

Set up layer execution order.

Called in setup function. A topological sort applied is to the layer list so that we can traverse the directed acyclic graph without violating dependencies.

◆ setup_layer_grid_tags()

void lbann::model::setup_layer_grid_tags ( const std::vector< El::Grid *> &  grids)
private

Set up grid tags for all layers.

Called in setup function.

◆ setup_layer_topology()

void lbann::model::setup_layer_topology ( )
private

Set up topology of layer graph.

Called in setup function. All layers in connected component of layer graph are added to the model and all parent/child relationships between layers are reciprocated.

◆ setup_layers()

void lbann::model::setup_layers ( size_t  max_mini_batch_size,
const std::vector< El::Grid *> &  grids 
)
private

Set up layers.

Called in setup function.

◆ setup_subcommunicators()

void lbann::model::setup_subcommunicators ( const std::vector< El::Grid *> &  grids)
private

◆ setup_subgrid_layers_run_condition()

void lbann::model::setup_subgrid_layers_run_condition ( )
private

◆ setup_subgrids()

void lbann::model::setup_subgrids ( )
private

Setup sub grids for the sub graph parallelism.

◆ setup_weights()

void lbann::model::setup_weights ( )
private

Set up weights.

Called in setup function. All weights being used by layers or the objective function are added to the model and all unused weights are deleted.

◆ summarize_matrices()

void lbann::model::summarize_matrices ( lbann_summary summarizer)

Summarize matrices (e.g. means).

These are called less frequently and can be more expensive.

◆ summarize_stats()

void lbann::model::summarize_stats ( lbann_summary summarizer)

Summarize statistics (e.g. timers, counters).

These should be computable quickly.

◆ swap_layers()

void lbann::model::swap_layers ( model other)

◆ swap_metrics()

void lbann::model::swap_metrics ( model other)

◆ swap_objective_function()

void lbann::model::swap_objective_function ( model other)

◆ swap_weights()

void lbann::model::swap_weights ( model other)

◆ update_layers()

bool lbann::model::update_layers ( )

Update layers step.

◆ update_weights()

void lbann::model::update_weights ( )

Update weights step.

◆ write_proto()

void lbann::model::write_proto ( lbann_data::Model &  proto)

Write model to proto file.

Member Data Documentation

◆ apply_subgraph_parallelism

bool lbann::model::apply_subgraph_parallelism = false
private

Definition at line 511 of file model.hpp.

◆ enable_subgraph_topology

bool lbann::model::enable_subgraph_topology = false
private

Definition at line 508 of file model.hpp.

◆ grids

std::unordered_map<std::string, std::shared_ptr<El::Grid> > lbann::model::grids
private

Definition at line 474 of file model.hpp.

◆ grids_mpi_groups

std::unordered_map<std::string, std::unique_ptr<El::mpi::Group> > lbann::model::grids_mpi_groups
private

Definition at line 481 of file model.hpp.

◆ m_background_io_allowed

bool lbann::model::m_background_io_allowed = true
private

Flag that allows input layers to fetch data in the background.

Definition at line 552 of file model.hpp.

◆ m_callbacks

std::vector<std::shared_ptr<callback_base> > lbann::model::m_callbacks
private

Current callbacks to process.

Definition at line 549 of file model.hpp.

◆ m_comm

lbann_comm* lbann::model::m_comm
private

LBANN communicator.

Definition at line 489 of file model.hpp.

◆ m_current_mini_batch_size

El::Int lbann::model::m_current_mini_batch_size
private

The current mini-batch size.

This should be set on each step by the execution algorithm using the value that the data coordinator gets from the data readers.

Number of samples being processed in the current step (iteration), used for correctly averaging gradients.

Definition at line 613 of file model.hpp.

◆ m_default_optimizer_msg

std::unique_ptr<lbann_data::Optimizer> lbann::model::m_default_optimizer_msg
private

If a layer needs to construct an optimizer during setup, it will make a copy of the default optimizer. This object is just used to create copies and is not actually used for optimization.

Definition at line 538 of file model.hpp.

◆ m_execution_context

observer_ptr<ExecutionContext> lbann::model::m_execution_context
private

Pointer to the execution context object used for training or evaluating this model

Definition at line 486 of file model.hpp.

◆ m_layers

std::vector<OwningLayerPtr> lbann::model::m_layers
private

Tensor operations.

The list is in execution order for forward propagation.

Definition at line 528 of file model.hpp.

◆ m_max_mini_batch_size

El::Int lbann::model::m_max_mini_batch_size
private

The maximum mini-batch size.

This should be set before setup_distconv() is called.

Definition at line 602 of file model.hpp.

◆ m_metrics

std::vector<std::unique_ptr<metric> > lbann::model::m_metrics
private

Numerical quantities to evaluate model performance.

Does not affect training.

Definition at line 546 of file model.hpp.

◆ m_model_is_setup

bool lbann::model::m_model_is_setup = false
private

Is the model setup.

Flag to indicate if the setup function has been called

Definition at line 557 of file model.hpp.

◆ m_name

std::string lbann::model::m_name
private

Model instance's name.

Each model in a trainer should have a unique, preferably human-readable, name.

Definition at line 523 of file model.hpp.

◆ m_objective_function

std::unique_ptr<objective_function> lbann::model::m_objective_function
private

Mathematical function to be minimized during training.

Definition at line 541 of file model.hpp.

◆ m_weights

std::vector<OwningWeightsPtr> lbann::model::m_weights
private

Trainable parameters.

Definition at line 531 of file model.hpp.

◆ num_resources_branch_layers

int lbann::model::num_resources_branch_layers
private

Definition at line 514 of file model.hpp.

◆ num_resources_non_branch_layers

int lbann::model::num_resources_non_branch_layers
private

Definition at line 517 of file model.hpp.

◆ subCommunicatorsSubgrids

std::unordered_map<std::string, std::shared_ptr<El::mpi::Comm> > lbann::model::subCommunicatorsSubgrids
private

Definition at line 477 of file model.hpp.

◆ subgraph_num_resources_parent

int lbann::model::subgraph_num_resources_parent = 0
private

Definition at line 503 of file model.hpp.

◆ vector_communication_subgraph

int lbann::model::vector_communication_subgraph = 0
private

Enable vector communication for the subgraph parallelism

Definition at line 499 of file model.hpp.


The documentation for this class was generated from the following file: