|
LBANN
0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
|
Abstract base class for neural network models. More...
#include <model.hpp>
Public Member Functions | |
| model (lbann_comm *comm, std::unique_ptr< objective_function > obj_fn, std::unique_ptr< lbann_data::Optimizer > default_optimizer_msg=nullptr) | |
| model (const model &other) | |
| model & | operator= (const model &other) |
| ~model ()=default | |
| void | copy_trained_weights_from (std::vector< weights *> &w) |
| Copy trained weights from input parameter w. More... | |
| template<typename TensorDataType > | |
| std::unique_ptr< optimizer > | create_optimizer () const |
| Construct an instance of the default optimizer. More... | |
| void | allow_background_io_activity (bool enable) noexcept |
| Set a flag that can be used to enable / disable the background I/O activities. More... | |
| bool | background_io_activity_allowed () const noexcept |
| Are background I/O activities enabled by the input layers. More... | |
| void | setup (size_t max_mini_batch_size, const std::vector< El::Grid *> &grids, bool force=false) |
| std::vector< observer_ptr< callback_base > > | get_callbacks () |
| Get the list of callbacks for the model. More... | |
| std::vector< std::shared_ptr< callback_base > > & | get_callbacks_with_ownership () noexcept |
| bool | has_valid_execution_context () const noexcept |
| ExecutionContext const & | get_execution_context () const |
| ExecutionContext & | get_execution_context () |
| void | reset_mode (ExecutionContext &context, execution_mode mode) |
| Reset model pointer and execution mode. More... | |
| void | reset_epoch_statistics (execution_mode mode) |
| Reset model statistics for an epoch. More... | |
| void | forward_prop (execution_mode mode) |
| Forward propagation step. More... | |
| void | backward_prop (bool compute_weight_grads_only=true) |
| Backward propagation step. More... | |
| void | evaluate_metrics (execution_mode mode, size_t current_mini_batch_size) |
| void | clear_gradients () |
| Clear each optimizer's gradient. More... | |
| void | update_weights () |
| Update weights step. More... | |
| bool | update_layers () |
| Update layers step. More... | |
| void | reconcile_weight_values () |
| Reconcile weight values. More... | |
| void | do_setup_end_cbs () |
| Execute callbacks at end of setup. More... | |
| void | do_model_forward_prop_begin_cbs (execution_mode mode) |
| Execute callbacks at start of model forward propagation. More... | |
| void | do_model_forward_prop_end_cbs (execution_mode mode) |
| Execute callbacks at end of model forward propagation. More... | |
| void | do_layer_forward_prop_begin_cbs (execution_mode mode, Layer *l) |
| Execute callbacks at start of layer forward propagation. More... | |
| void | do_layer_forward_prop_end_cbs (execution_mode mode, Layer *l) |
| Execute callbacks at end of layer forward propagation. More... | |
| void | do_model_backward_prop_begin_cbs () |
| Execute callbacks at start of model backward propagation. More... | |
| void | do_model_backward_prop_end_cbs () |
| Execute callbacks at end of model backward propagation. More... | |
| void | do_layer_backward_prop_begin_cbs (Layer *l) |
| Execute callbacks at start of layer backward propagation. More... | |
| void | do_layer_backward_prop_end_cbs (Layer *l) |
| Execute callbacks at end of layer backward propagation. More... | |
| void | do_model_optimize_begin_cbs () |
| Execute callbacks at start of model optimization. More... | |
| void | do_model_optimize_end_cbs () |
| Execute callbacks at end of model optimization. More... | |
| void | do_weight_optimize_begin_cbs (weights *w) |
| Execute callbacks at the start of weight optimization. More... | |
| void | do_weight_optimize_end_cbs (weights *w) |
| Execute callbacks at the end of weight optimization. More... | |
| El::Int | get_max_mini_batch_size () const noexcept |
| Return the maximum mini-batch size. More... | |
| El::Int | get_current_mini_batch_size () const noexcept |
| Return the current mini-batch size. More... | |
| void | set_current_mini_batch_size (El::Int) noexcept |
| Set the current mini-batch size. More... | |
| void | set_name (std::string name) |
| Metadata Accessors. More... | |
| std::string | get_name () const noexcept |
| Model instance name. More... | |
| description | get_description () const |
| Human-readable description. More... | |
| lbann_comm * | get_comm () const noexcept |
| Get the model's comm. More... | |
| El::Int | get_num_layers () const noexcept |
| Machine-learning object accessors. More... | |
| Layer & | get_layer (El::Int pos) |
| Layer const & | get_layer (El::Int pos) const |
| std::vector< Layer * > | get_layers () |
| Return list of layers in model. More... | |
| std::vector< Layer const * > | get_layers () const |
| Return list of layers in model. More... | |
| std::vector< weights * > | get_weights () |
| std::vector< weights const * > | get_weights () const |
| std::vector< ViewingWeightsPtr > | get_weights_pointers () const |
| observer_ptr< objective_function const > | get_objective_function () const noexcept |
| Mathematical function to be minimized during training. More... | |
| observer_ptr< objective_function > | get_objective_function () noexcept |
| std::vector< metric * > | get_metrics () |
| Return the model's metrics. More... | |
| std::vector< metric const * > | get_metrics () const |
Model specification | |
| void | add_layer (OwningLayerPtr &&l) |
| Add layer to model. More... | |
| void | add_weights (OwningWeightsPtr &&w) |
| Add weights to model. More... | |
| void | remove_weights (std::string const &name) |
| Remove weights from model. More... | |
| void | add_callback (std::shared_ptr< callback_base > cb) |
| Register a new callback for the model. More... | |
| void | add_metric (std::unique_ptr< metric > m) |
| Register a new metric for the model. More... | |
| void | insert_layer (OwningLayerPtr &&l, std::string const &parent_name) |
| Insert layer in model. More... | |
| void | remove_layer (std::string const &name) |
| Remove layer from model. More... | |
| void | replace_layer (OwningLayerPtr &&l, std::string const &name) |
| Replace layer in model. More... | |
| void | swap_layers (model &other) |
| void | swap_weights (model &other) |
| void | swap_metrics (model &other) |
| void | swap_objective_function (model &other) |
Summarization | |
| void | summarize_stats (lbann_summary &summarizer) |
| Summarize statistics (e.g. timers, counters). More... | |
| void | summarize_matrices (lbann_summary &summarizer) |
| Summarize matrices (e.g. means). More... | |
Checkpointing and serialization. | |
| template<class Archive > | |
| void | serialize (Archive &ar) |
| Serialization for checkpoint and restart with Cereal. More... | |
| bool | save_to_checkpoint_shared (persist &p) |
| Checkpoint model to given file descriptor, return number of bytes written. More... | |
| bool | load_from_checkpoint_shared (persist &p) |
| Restore model by reading checkpoint from given file descriptor, return number of bytes read. More... | |
| bool | save_to_checkpoint_distributed (persist &p) |
| bool | load_from_checkpoint_distributed (persist &p) |
| void | write_proto (lbann_data::Model &proto) |
| Write model to proto file. More... | |
| void | save_model () |
| Saves the model explicitly if the save_model callback is present. More... | |
| void | set_subgrid_communication_type (int type) noexcept |
| Subgraph Parallelism Interface. More... | |
| int | get_subgrid_communication_type () const noexcept |
| void | set_subgraph_num_parent_resources (int num_resources) noexcept |
| int | get_subgraph_num_parent_resources () const noexcept |
| void | set_subgrid_topology (bool type) noexcept |
| bool | get_subgrid_topology () const noexcept |
| void | enable_subgraph_parallelism () noexcept |
| bool | is_subgraph_parallelism_enabled () const noexcept |
| int | get_num_resources_non_branch_layers () const noexcept |
| int | get_num_resources_branch_layers () const noexcept |
| void | set_num_resources_non_branch_layers (int num) noexcept |
| void | set_num_resources_branch_layers (int num) noexcept |
Private Member Functions | |
| model () | |
| void | add_evaluation_layers (std::unordered_set< Layer *> &layer_set, std::unordered_set< std::string > &layer_names) |
| Insert evaluation layers where needed. More... | |
| void | add_dummy_layers (std::unordered_set< std::string > &layer_names) |
| Insert dummy layers after layers with too few children. More... | |
| void | add_split_layers (std::unordered_set< std::string > &layer_names) |
| Insert split layers after layers with too many children. More... | |
| void | ensure_input_layers_first () |
| void | reorder_layers (const std::vector< El::Int > &gather_indices) |
| Setup-related implementation. More... | |
| void | remap_pointers (const std::unordered_map< Layer *, ViewingLayerPtr > &layer_map, const std::unordered_map< weights *, ViewingWeightsPtr > &weights_map) |
| Remap pointers. More... | |
| void | setup_layer_topology () |
| Set up topology of layer graph. More... | |
| void | setup_layer_execution_order () |
| Set up layer execution order. More... | |
| void | setup_layer_grid_tags (const std::vector< El::Grid *> &grids) |
| Set up grid tags for all layers. More... | |
| void | setup_layers (size_t max_mini_batch_size, const std::vector< El::Grid *> &grids) |
| Set up layers. More... | |
| void | setup_weights () |
| Set up weights. More... | |
Subgraph parallelism implementation | |
| void | setup_subgrids () |
| Setup sub grids for the sub graph parallelism. More... | |
| void | get_subgrids_order (std::vector< int > &ranks_order, int num_branches) |
| int | get_max_subgraph_branches () |
| void | check_subgraph_parallelism () |
| void | setup_subgrid_layers_run_condition () |
| void | get_parent_subgrid_tags (int layer_index) |
| void | get_subgraph_subgrids_ranks (std::vector< int > &parent_ranks, std::vector< int > &subgrid_ranks, int layer_index, int number_ranks_in_grid) |
| void | get_resources_for_spliting_point (std::vector< int > &parent_ranks, std::vector< int > &subgrid_ranks, int layer_index, int number_ranks_in_grid, int num_subgrids) |
| void | get_resources_for_merge_layers (std::set< int > &pooled_set, int child_index, int num_subgrids) |
| void | get_resources_for_input_layer (std::vector< int > &masterSubGrid, int num_subgrids) |
| void | setup_subcommunicators (const std::vector< El::Grid *> &grids) |
Private Attributes | |
| std::unordered_map< std::string, std::shared_ptr< El::Grid > > | grids |
| std::unordered_map< std::string, std::shared_ptr< El::mpi::Comm > > | subCommunicatorsSubgrids |
| std::unordered_map< std::string, std::unique_ptr< El::mpi::Group > > | grids_mpi_groups |
| observer_ptr< ExecutionContext > | m_execution_context |
| lbann_comm * | m_comm |
| LBANN communicator. More... | |
| int | vector_communication_subgraph = 0 |
| int | subgraph_num_resources_parent = 0 |
| bool | enable_subgraph_topology = false |
| bool | apply_subgraph_parallelism = false |
| int | num_resources_branch_layers |
| int | num_resources_non_branch_layers |
| std::string | m_name |
| Model instance's name. More... | |
| std::vector< OwningLayerPtr > | m_layers |
| Tensor operations. More... | |
| std::vector< OwningWeightsPtr > | m_weights |
| Trainable parameters. More... | |
| std::unique_ptr< lbann_data::Optimizer > | m_default_optimizer_msg |
| std::unique_ptr< objective_function > | m_objective_function |
| Mathematical function to be minimized during training. More... | |
| std::vector< std::unique_ptr< metric > > | m_metrics |
| Numerical quantities to evaluate model performance. More... | |
| std::vector< std::shared_ptr< callback_base > > | m_callbacks |
| Current callbacks to process. More... | |
| bool | m_background_io_allowed = true |
| Flag that allows input layers to fetch data in the background. More... | |
| bool | m_model_is_setup = false |
| Is the model setup. More... | |
| El::Int | m_max_mini_batch_size |
| The maximum mini-batch size. More... | |
| El::Int | m_current_mini_batch_size |
| The current mini-batch size. More... | |
| lbann::model::model | ( | lbann_comm * | comm, |
| std::unique_ptr< objective_function > | obj_fn, | ||
| std::unique_ptr< lbann_data::Optimizer > | default_optimizer_msg = nullptr |
||
| ) |
| lbann::model::model | ( | const model & | other | ) |
|
default |
|
private |
| void lbann::model::add_callback | ( | std::shared_ptr< callback_base > | cb | ) |
Register a new callback for the model.
|
private |
Insert dummy layers after layers with too few children.
If a layer expects more child layers than it has, add dummy layers until it has enough children.
| layer_names | Names of layers in model. Updated with any newly created layers. |
|
private |
Insert evaluation layers where needed.
If a lbann::layer_term or lbann::layer_metric corresponds to a layer that is not an evaluation_layer, an evaluation layer is created and added to the model.
| layer_set | Layers in model. Updated with any newly created layers. |
| layer_names | Names of layers in model. Updated with any newly created layers. |
| void lbann::model::add_layer | ( | OwningLayerPtr && | l | ) |
Add layer to model.
| void lbann::model::add_metric | ( | std::unique_ptr< metric > | m | ) |
Register a new metric for the model.
|
private |
Insert split layers after layers with too many children.
If a layer expects one child layer but has multiple, add a split layer to the model.
| layer_names | Names of layers in model. Updated with any newly created layers. |
| void lbann::model::add_weights | ( | OwningWeightsPtr && | w | ) |
Add weights to model.
|
inlinenoexcept |
|
inlinenoexcept |
| void lbann::model::backward_prop | ( | bool | compute_weight_grads_only = true | ) |
Backward propagation step.
|
private |
| void lbann::model::clear_gradients | ( | ) |
Clear each optimizer's gradient.
This must be called before training forward prop since layers set an optimizer flag during forward prop.
| void lbann::model::copy_trained_weights_from | ( | std::vector< weights *> & | w | ) |
Copy trained weights from input parameter w.
Only weight values are placed, pointers and layer structure are in place. Weights to be copied are of the same name
|
inline |
| void lbann::model::do_layer_backward_prop_begin_cbs | ( | Layer * | l | ) |
Execute callbacks at start of layer backward propagation.
| void lbann::model::do_layer_backward_prop_end_cbs | ( | Layer * | l | ) |
Execute callbacks at end of layer backward propagation.
| void lbann::model::do_layer_forward_prop_begin_cbs | ( | execution_mode | mode, |
| Layer * | l | ||
| ) |
Execute callbacks at start of layer forward propagation.
| void lbann::model::do_layer_forward_prop_end_cbs | ( | execution_mode | mode, |
| Layer * | l | ||
| ) |
Execute callbacks at end of layer forward propagation.
| void lbann::model::do_model_backward_prop_begin_cbs | ( | ) |
Execute callbacks at start of model backward propagation.
| void lbann::model::do_model_backward_prop_end_cbs | ( | ) |
Execute callbacks at end of model backward propagation.
| void lbann::model::do_model_forward_prop_begin_cbs | ( | execution_mode | mode | ) |
Execute callbacks at start of model forward propagation.
| void lbann::model::do_model_forward_prop_end_cbs | ( | execution_mode | mode | ) |
Execute callbacks at end of model forward propagation.
| void lbann::model::do_model_optimize_begin_cbs | ( | ) |
Execute callbacks at start of model optimization.
| void lbann::model::do_model_optimize_end_cbs | ( | ) |
Execute callbacks at end of model optimization.
| void lbann::model::do_setup_end_cbs | ( | ) |
Execute callbacks at end of setup.
| void lbann::model::do_weight_optimize_begin_cbs | ( | weights * | w | ) |
Execute callbacks at the start of weight optimization.
| void lbann::model::do_weight_optimize_end_cbs | ( | weights * | w | ) |
Execute callbacks at the end of weight optimization.
|
inlinenoexcept |
|
private |
| void lbann::model::evaluate_metrics | ( | execution_mode | mode, |
| size_t | current_mini_batch_size | ||
| ) |
Evaluate any metrics in the model
| void lbann::model::forward_prop | ( | execution_mode | mode | ) |
Forward propagation step.
|
inline |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
| description lbann::model::get_description | ( | ) | const |
Human-readable description.
|
inline |
|
inline |
| Layer& lbann::model::get_layer | ( | El::Int | pos | ) |
| pos | Position in model's list of layers. |
| Layer const& lbann::model::get_layer | ( | El::Int | pos | ) | const |
| pos | Position in model's list of layers. |
| std::vector<Layer*> lbann::model::get_layers | ( | ) |
Return list of layers in model.
The list is in execution order for forward propagation.
| std::vector<Layer const*> lbann::model::get_layers | ( | ) | const |
Return list of layers in model.
The list is in execution order for forward propagation.
|
inlinenoexcept |
|
private |
| std::vector<metric*> lbann::model::get_metrics | ( | ) |
Return the model's metrics.
| std::vector<metric const*> lbann::model::get_metrics | ( | ) | const |
|
inlinenoexcept |
|
noexcept |
Machine-learning object accessors.
Size of model's list of layers.
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
private |
|
private |
|
private |
|
private |
|
inlinenoexcept |
|
private |
|
inlinenoexcept |
|
inlinenoexcept |
|
private |
| std::vector<weights*> lbann::model::get_weights | ( | ) |
| std::vector<weights const*> lbann::model::get_weights | ( | ) | const |
| std::vector<ViewingWeightsPtr> lbann::model::get_weights_pointers | ( | ) | const |
|
inlinenoexcept |
| void lbann::model::insert_layer | ( | OwningLayerPtr && | l, |
| std::string const & | parent_name | ||
| ) |
Insert layer in model.
|
inlinenoexcept |
| bool lbann::model::load_from_checkpoint_distributed | ( | persist & | p | ) |
| bool lbann::model::load_from_checkpoint_shared | ( | persist & | p | ) |
Restore model by reading checkpoint from given file descriptor, return number of bytes read.
| void lbann::model::reconcile_weight_values | ( | ) |
Reconcile weight values.
If weight values are duplicated across multiple processes, they are set to the average across the processes.
|
private |
Remap pointers.
Layer and weights pointers are remapped using the provided maps. If a pointer is not a key in the corresponding map, the pointer is not changed.
| void lbann::model::remove_layer | ( | std::string const & | name | ) |
Remove layer from model.
| void lbann::model::remove_weights | ( | std::string const & | name | ) |
Remove weights from model.
|
private |
Setup-related implementation.
Reorder layer list with a gather.
The new layer list is the same length as gather_indices and its entries are given by
Since entries in the layer list must be unique, this will fail if gather_indices has any repeated entries.
| void lbann::model::replace_layer | ( | OwningLayerPtr && | l, |
| std::string const & | name | ||
| ) |
Replace layer in model.
| void lbann::model::reset_epoch_statistics | ( | execution_mode | mode | ) |
Reset model statistics for an epoch.
| void lbann::model::reset_mode | ( | ExecutionContext & | context, |
| execution_mode | mode | ||
| ) |
Reset model pointer and execution mode.
| void lbann::model::save_model | ( | ) |
Saves the model explicitly if the save_model callback is present.
| bool lbann::model::save_to_checkpoint_distributed | ( | persist & | p | ) |
| bool lbann::model::save_to_checkpoint_shared | ( | persist & | p | ) |
Checkpoint model to given file descriptor, return number of bytes written.
| void lbann::model::serialize | ( | Archive & | ar | ) |
Serialization for checkpoint and restart with Cereal.
|
inlinenoexcept |
| void lbann::model::set_name | ( | std::string | name | ) |
Metadata Accessors.
Model instance name.
Each model in a trainer should have a unique, and preferably human-readable, name.
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
| void lbann::model::setup | ( | size_t | max_mini_batch_size, |
| const std::vector< El::Grid *> & | grids, | ||
| bool | force = false |
||
| ) |
Must be called after model specification and before execution.
|
private |
Set up layer execution order.
Called in setup function. A topological sort applied is to the layer list so that we can traverse the directed acyclic graph without violating dependencies.
|
private |
Set up grid tags for all layers.
Called in setup function.
|
private |
Set up topology of layer graph.
Called in setup function. All layers in connected component of layer graph are added to the model and all parent/child relationships between layers are reciprocated.
|
private |
Set up layers.
Called in setup function.
|
private |
|
private |
|
private |
Setup sub grids for the sub graph parallelism.
|
private |
Set up weights.
Called in setup function. All weights being used by layers or the objective function are added to the model and all unused weights are deleted.
| void lbann::model::summarize_matrices | ( | lbann_summary & | summarizer | ) |
Summarize matrices (e.g. means).
These are called less frequently and can be more expensive.
| void lbann::model::summarize_stats | ( | lbann_summary & | summarizer | ) |
Summarize statistics (e.g. timers, counters).
These should be computable quickly.
| void lbann::model::swap_layers | ( | model & | other | ) |
| void lbann::model::swap_metrics | ( | model & | other | ) |
| void lbann::model::swap_objective_function | ( | model & | other | ) |
| void lbann::model::swap_weights | ( | model & | other | ) |
| bool lbann::model::update_layers | ( | ) |
Update layers step.
| void lbann::model::update_weights | ( | ) |
Update weights step.
| void lbann::model::write_proto | ( | lbann_data::Model & | proto | ) |
Write model to proto file.
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
The current mini-batch size.
This should be set on each step by the execution algorithm using the value that the data coordinator gets from the data readers.
Number of samples being processed in the current step (iteration), used for correctly averaging gradients.
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |