|
LBANN
0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
|
An implementation of the KFAC second-order optimization algorithm. More...
#include <kfac.hpp>
Public Types | |
| using | TermCriteriaType = SGDTerminationCriteria |
| using | ExeContextType = kfac::KFACExecutionContext |
Public Member Functions | |
Life-cycle management | |
| KFAC (std::string name, std::unique_ptr< TermCriteriaType > stop, std::vector< double > damping_act_params, std::vector< double > damping_err_params, std::vector< double > damping_bn_act_params, std::vector< double > damping_bn_err_params, std::vector< bool > kfac_use_interval, size_t damping_warmup_steps, double kronecker_decay, bool print_time, bool print_matrix, bool print_matrix_summary, bool use_pi, std::vector< size_t > update_intervals, size_t update_interval_steps, kfac::kfac_inverse_strategy inverse_strategy, std::vector< std::string > disable_layers, double learning_rate_factor, double learning_rate_factor_gru, size_t compute_interval, bool distribute_precondition_compute, bool use_eigen_decomposition, bool enable_copy_errors, bool enable_copy_activations) | |
| Construct KFAC from its component pieces. More... | |
| ~KFAC () noexcept=default | |
| KFAC (KFAC const &other)=delete | |
| KFAC & | operator= (const KFAC &other)=delete |
| KFAC (KFAC &&other)=default | |
| KFAC & | operator= (KFAC &&other)=default |
| std::string | get_type () const final |
| Queries. More... | |
Apply interface | |
| void | apply (ExecutionContext &context, model &m, data_coordinator &dc, execution_mode mode) final |
| Apply the training algorithm to refine model weights. More... | |
| void | train (ExeContextType &c, model &model, data_coordinator &dc, TermCriteriaType const &term) |
| Train a model using KFAC. More... | |
Public Member Functions inherited from lbann::TrainingAlgorithm | |
| TrainingAlgorithm (std::string name) | |
| Constructor. More... | |
| virtual | ~TrainingAlgorithm ()=default |
| std::string const & | get_name () const noexcept |
| A user-defined string identifying the algorithm object. More... | |
| void | apply (model &model, data_coordinator &dc) |
| Apply the algorithm to the given model. More... | |
| void | setup_models (std::vector< observer_ptr< model >> const &models, size_t max_mini_batch_size, const std::vector< El::Grid *> &grids) |
| Setup a collection of models. More... | |
| std::unique_ptr< ExecutionContext > | get_new_execution_context () const |
| Get a default-initialized execution context that fits this training algorithm. More... | |
Static Public Attributes | |
| static constexpr const El::Device | Device = El::Device::CPU |
| static constexpr const double | damping_0_default = 3e-2 |
| The default parameters of a Tikhonov damping technique. More... | |
| static constexpr const size_t | damping_warmup_steps_default = 100 |
| static constexpr const double | kronecker_decay_default = 0.99 |
| The default parameters of the decay factor. More... | |
| static constexpr const bool | prof_sync = true |
| Parameters for prof_region_*. More... | |
| static constexpr const int | prof_color = 0 |
Private Member Functions | |
| void | on_forward_prop_end (ExeContextType &context, model &model) |
| void | on_backward_prop_end (ExeContextType &context, model &model) |
| void | sync_weights_model (model &model, lbann_comm *comm) |
| Data exchange functions to synchronize model and weights. More... | |
| void | start_sync_weights_async (model &model, lbann_comm *comm) |
| void | end_sync_weights_async (model &model, lbann_comm *comm) |
| void | start_old_async_weights_model (model &model, lbann_comm *comm, ExeContextType &context) |
| void | end_old_async_weights_model (model &model, lbann_comm *comm, ExeContextType &context) |
| void | allgather_precondition_gradient (lbann_comm &comm, ExeContextType &context) |
Private Attributes | |
| std::unique_ptr< TermCriteriaType > | m_stopping_criteria |
| The KFAC stopping criteria. More... | |
| std::vector< double > | m_damping_act_params |
| Pairs of the initial and the target damping value. If only one value is specified, it will be used throughout training. More... | |
| std::vector< double > | m_damping_err_params |
| std::vector< double > | m_damping_bn_act_params |
| std::vector< double > | m_damping_bn_err_params |
| size_t | m_damping_warmup_steps |
| The number of warmup steps of the Tikhnov damping technique. More... | |
| double | m_kronecker_decay |
| The decay factor of kronecker factors. More... | |
| bool | m_print_time |
| Knobs to print information for debugging. More... | |
| bool | m_print_matrix |
| bool | m_print_matrix_summary |
| bool | m_use_pi |
| Weather to use the pi constant to adjust the damping constant. More... | |
| std::vector< size_t > | m_update_intervals |
| Space-separated pairs of the initial and the target update intervals. If only one value is specified, it will be used throughout training. More... | |
| size_t | m_update_interval_steps |
| The number of steps for changing the update interval. More... | |
| kfac::kfac_inverse_strategy | m_inverse_strategy |
| Assignment strategy for the model-parallel part. More... | |
| std::vector< std::string > | m_disable_layers |
| List of layers to be ignored by the callback. More... | |
| double | m_learning_rate_factor |
| Factors to be multiplied to the learning rate. More... | |
| double | m_learning_rate_factor_gru |
| bool | m_has_kronecker_inverse = false |
| Whether inverse of Kronecker factors are available. More... | |
| size_t | m_compute_interval |
| KFAC Compute interval. More... | |
| bool | m_distribute_precondition_compute |
| distribute precondition gradient compute. More... | |
| bool | m_enable_copy_errors |
| copy errors to a temporary matrix to increase overlap of compute and communication. More... | |
| bool | m_enable_copy_activations |
| copy activations to a temporary matrix to increase overlap of compute and communication. More... | |
| bool | m_use_eigen_decomposition |
| use eigen value decomposition for inversing the matrix. More... | |
| El::Matrix< double, El::Device::CPU > | m_inverse_matrices_size |
| int | m_global_inverse_buffer_size = 0 |
| int | m_weight_matrices_buffer_size = 0 |
| std::vector< kfac::ReqT > | m_inverse_matrix_communication_reqs |
| vector for async communication reqs. More... | |
| std::vector< kfac::ReqT > | m_weights_communication_reqs |
| int | m_time_span_inverse_comm = 0 |
| Profiling variables. More... | |
| int | m_time_span_inverse_send_recv = 0 |
| int | m_time_span_forward_comm = 0 |
| int | m_time_span_forward_comm_end = 0 |
| int | m_time_span_backward_comm = 0 |
| int | m_time_span_backward_comm_end = 0 |
| int | m_time_span_precond_comm = 0 |
| int | m_time_forward_pass = 0 |
| int | m_time_backward_pass = 0 |
| int | m_time_kfac = 0 |
| std::vector< bool > | m_use_KFAC_epoch |
An implementation of the KFAC second-order optimization algorithm.
Martens, James and Roger Grosse. "Optimizing neural networks with kronecker-factored approximate curvature." International conference on machine learning. 2015.
Grosse, Roger, and James Martens. "A kronecker-factored approximate fisher matrix for convolution layers." International Conference on Machine Learning. 2016.
Osawa, Kazuki, et al. "Large-scale distributed second-order optimization using kronecker-factored approximate curvature for deep convolutional neural networks." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019.
| lbann::KFAC::KFAC | ( | std::string | name, |
| std::unique_ptr< TermCriteriaType > | stop, | ||
| std::vector< double > | damping_act_params, | ||
| std::vector< double > | damping_err_params, | ||
| std::vector< double > | damping_bn_act_params, | ||
| std::vector< double > | damping_bn_err_params, | ||
| std::vector< bool > | kfac_use_interval, | ||
| size_t | damping_warmup_steps, | ||
| double | kronecker_decay, | ||
| bool | print_time, | ||
| bool | print_matrix, | ||
| bool | print_matrix_summary, | ||
| bool | use_pi, | ||
| std::vector< size_t > | update_intervals, | ||
| size_t | update_interval_steps, | ||
| kfac::kfac_inverse_strategy | inverse_strategy, | ||
| std::vector< std::string > | disable_layers, | ||
| double | learning_rate_factor, | ||
| double | learning_rate_factor_gru, | ||
| size_t | compute_interval, | ||
| bool | distribute_precondition_compute, | ||
| bool | use_eigen_decomposition, | ||
| bool | enable_copy_errors, | ||
| bool | enable_copy_activations | ||
| ) |
Construct KFAC from its component pieces.
|
defaultnoexcept |
|
delete |
|
default |
|
private |
|
finalvirtual |
Apply the training algorithm to refine model weights.
| [in,out] | context | The persistent execution context for this algorithm. |
| [in,out] | m | The model to be trained. |
| [in,out] | dc | The data source for training. |
| [in] | mode | Completely superfluous. |
Implements lbann::TrainingAlgorithm.
|
protected |
Execute callbacks at start of mini-batch.
|
protected |
Execute callbacks at end of mini-batch.
|
protected |
Execute callbacks at start of epoch.
|
protected |
Execute callbacks at end of epoch.
|
finalprotectedvirtual |
Covariant return-friendly implementation of get_new_exection_context().
Implements lbann::TrainingAlgorithm.
|
protected |
Execute callbacks at start of training.
|
protected |
Execute callbacks at end of training.
|
private |
|
protected |
|
private |
|
finalvirtual |
Queries.
Implements lbann::TrainingAlgorithm.
|
private |
|
private |
|
private |
|
protected |
|
private |
|
private |
Data exchange functions to synchronize model and weights.
| void lbann::KFAC::train | ( | ExeContextType & | c, |
| model & | model, | ||
| data_coordinator & | dc, | ||
| TermCriteriaType const & | term | ||
| ) |
Train a model using KFAC.
|
protected |
Train model on one step / mini-batch of an SGD forward pass.
|
static |
|
static |
|
static |
|
static |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
static |