d4/d7d/kfac__block_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.

 #ifndef LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_HPP_INCLUDED
 #define LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_HPP_INCLUDED

 #include "lbann/execution_algorithms/kfac/execution_context.hpp"
 #include "lbann/layers/layer.hpp"

 namespace lbann {

 // Forward declaration
 namespace kfac {
 class KFACExecutionContext;
 }

 namespace kfac {
 #if defined AL_HAS_NCCL
 using BackendT = ::Al::NCCLBackend;
 #elif defined AL_HAS_HOST_TRANSFER
 using BackendT = ::Al::HostTransferBackend;
 #else
 using BackendT = ::Al::MPIBackend;
 #endif

 using ReqT = typename BackendT::req_type;
 } // namespace kfac

 template <El::Device Device>
 class kfac_block
 {
 public:
   kfac_block(Layer* layer,
              kfac::KFACExecutionContext* context,
              size_t layer_id,
              size_t inverse_proc_rank,
              bool enable_copy_errors,
              bool enable_copy_activations,
              int input_size,
              int output_size)
     : m_layer(layer),
       m_layer_id(layer_id),
       m_inverse_proc_rank(inverse_proc_rank),
       m_input_size(input_size),
       m_output_size(output_size),
       m_enable_copy_errors(enable_copy_errors),
       m_enable_copy_activations(enable_copy_activations),
       m_context(context)
   {
     m_has_kronecker_inverse = false;
   }
   virtual ~kfac_block() = default;

   virtual void on_forward_prop_end(lbann_comm* comm) {}

   virtual int get_local_memory_consumption() = 0;

   virtual void compute_local_kronecker_factors(lbann_comm* comm,
                                                bool print_matrix,
                                                bool print_matrix_summary);

   virtual const std::vector<El::AbstractMatrix<DataType>*>
   get_local_kronecker_buffers();

   virtual void update_kronecker_average(lbann_comm* comm,
                                         DataType kronecker_decay,
                                         bool print_matrix,
                                         bool print_matrix_summary);

   virtual void update_kronecker_inverse(lbann_comm* comm,
                                         bool use_pi,
                                         DataType damping_act,
                                         DataType damping_err,
                                         DataType learning_rate_factor,
                                         bool use_eigen_decomposition,
                                         bool print_matrix,
                                         bool print_matrix_summary,
                                         bool print_time);

   virtual void compute_preconditioned_gradients(lbann_comm* comm,
                                                 DataType learning_rate_factor,
                                                 bool print_matrix,
                                                 bool print_matrix_summary,
                                                 bool print_time);

   virtual void initialize_activations_and_errors(lbann_comm* comm,
                                                  int num_local_activations,
                                                  int num_local_errors,
                                                  int num_weights);

   virtual void start_communication_forward_end(lbann_comm* comm) = 0;
   virtual void end_communication_forward_end(lbann_comm* comm) = 0;
   virtual void start_communication_backward_end(lbann_comm* comm) = 0;
   virtual void end_communication_backward_end(lbann_comm* comm) = 0;

   virtual const std::vector<El::AbstractMatrix<DataType>*>
   get_preconditioned_grad_buffers();

   virtual int get_inverse_matrices(El::Matrix<DataType, Device>& output,
                                    int offset) = 0;

   virtual int get_inverse_matrices_size(lbann_comm* comm) = 0;

   virtual std::vector<int>
   get_inverse_matrices_size_vector(lbann_comm* comm) = 0;

   virtual void resize_inverse_matrices_size(
     El::Matrix<double, El::Device::CPU>& inverse_matrices_size,
     int block_number) = 0;

   virtual int set_inverse_matrices(El::Matrix<DataType, Device>& workspace,
                                    int offset,
                                    lbann_comm* comm) = 0;

   void set_current_batch_size(El::Int batch_size) { m_batch_size = batch_size; }

   virtual std::string get_info() const
   {
     std::ostringstream oss;
     oss << "name=" << m_layer->get_name() << ", id=" << m_layer_id
         << ", type=" << m_layer->get_type()
         << ", inverse_proc_rank=" << m_inverse_proc_rank;
     return oss.str();
   }

   std::string get_name() const { return m_layer->get_name(); }

   size_t get_inverse_proc_rank() const { return m_inverse_proc_rank; }

   DataType* get_local_activation_buffer(int index)
   {
     return m_parent_local_activations[index]->Buffer();
   }

   DataType* get_local_error_buffer(int index)
   {
     return m_child_local_errors[index]->Buffer();
   }

   DataType* get_weight_buffer(int index)
   {
     return m_weight_values[index]->Buffer();
   }

   DataType* get_gradient_wrt_weight_buffer(int index)
   {
     return m_weight_gradients[index]->Buffer();
   }

   El::Int get_current_batch_size() { return m_batch_size; }

   El::Int get_input_size() { return m_input_size; }

   El::Int get_output_size() { return m_output_size; }

   virtual std::vector<std::tuple<std::string, size_t, size_t>>
   get_internal_matrix_info() const;

 protected:
   El::Matrix<DataType, Device>&
   get_workspace_matrix(const std::string& key, size_t height, size_t width);

   El::SyncInfo<Device> get_sync_info();

   Layer* m_layer;

   const size_t m_layer_id;

   const int m_inverse_proc_rank;

   std::vector<std::unique_ptr<AbsDistMat>> m_parent_local_activations,
     m_child_local_errors, m_weight_gradients, m_subset_matrix, m_errors_copy,
     m_activations_copy;

   std::vector<std::unique_ptr<AbsDistMat>> m_weight_values;

   std::vector<kfac::ReqT> m_requests_forward_end, m_requests_backward_end;

   int m_input_size, m_output_size, m_batch_size;

   bool m_enable_copy_errors;

   bool m_enable_copy_activations;

   bool m_has_kronecker_inverse;

 private:
   kfac::KFACExecutionContext* m_context;
 };

 } // namespace lbann

 #endif // LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_HPP_INCLUDED
lbann::kfac_block::m_enable_copy_errors
bool m_enable_copy_errors
Enable copying of errors to enhance async communication.
Definition: kfac_block.hpp:240

lbann::kfac_block::m_layer
Layer * m_layer
The target layer.
Definition: kfac_block.hpp:214

lbann::kfac_block::m_inverse_proc_rank
const int m_inverse_proc_rank
The process ID which perform inverse on Kronecker.
Definition: kfac_block.hpp:221

lbann::kfac::BackendT
::Al::MPIBackend BackendT
Definition: kfac_block.hpp:46

lbann::kfac_block::set_current_batch_size
void set_current_batch_size(El::Int batch_size)
Definition: kfac_block.hpp:156

lbann::kfac_block::m_enable_copy_activations
bool m_enable_copy_activations
Enable copying of activations to enhance async communication.
Definition: kfac_block.hpp:243

lbann::kfac_block::get_current_batch_size
El::Int get_current_batch_size()
Definition: kfac_block.hpp:192

lbann::kfac_block::get_gradient_wrt_weight_buffer
DataType * get_gradient_wrt_weight_buffer(int index)
Definition: kfac_block.hpp:187

lbann::lbann_comm
Definition: comm.hpp:105

lbann::Layer
Neural network tensor operation.
Definition: layer.hpp:285

lbann::kfac_block::m_weight_values
std::vector< std::unique_ptr< AbsDistMat > > m_weight_values
Translatebetweengrid funciton has a basic implementation for STAR,STAR distributed matrices...
Definition: kfac_block.hpp:231

lbann::kfac::ReqT
typename BackendT::req_type ReqT
Definition: kfac_block.hpp:49

lbann::kfac_block::m_layer_id
const size_t m_layer_id
The layer ID in the model. TODO: Remove this.
Definition: kfac_block.hpp:218

lbann::kfac_block::get_output_size
El::Int get_output_size()
Definition: kfac_block.hpp:196

lbann::kfac_block::m_output_size
int m_output_size
Definition: kfac_block.hpp:237

lbann::kfac_block::m_context
kfac::KFACExecutionContext * m_context
The execution context that created this block. TODO: Use its own workspace and remove this pointer...
Definition: kfac_block.hpp:251

lbann::kfac::update_kronecker_average
void update_kronecker_average(El::Matrix< DataType, Device > &Aave, const El::Matrix< DataType, Device > &A, size_t count, double decay, const El::SyncInfo< Device > &sync_info)
Update a Kronecker factor matrix using decay.

lbann::kfac::KFACExecutionContext
Definition: kfac/execution_context.hpp:57

execution_context.hpp

lbann::kfac_block::m_requests_forward_end
std::vector< kfac::ReqT > m_requests_forward_end
Definition: kfac_block.hpp:233

lbann::kfac_block
Definition: kfac/execution_context.hpp:40

lbann::kfac_block::get_info
virtual std::string get_info() const
Get block&#39;s information in one line.
Definition: kfac_block.hpp:159

lbann::kfac_block::get_name
std::string get_name() const
Definition: kfac_block.hpp:168

layer.hpp

lbann::kfac_block::get_local_activation_buffer
DataType * get_local_activation_buffer(int index)
Definition: kfac_block.hpp:172

lbann::kfac_block::get_local_error_buffer
DataType * get_local_error_buffer(int index)
Definition: kfac_block.hpp:177

lbann::kfac_block::get_input_size
El::Int get_input_size()
Definition: kfac_block.hpp:194

lbann::kfac_block::m_has_kronecker_inverse
bool m_has_kronecker_inverse
Whether this block already has an inverse history.
Definition: kfac_block.hpp:246

lbann::kfac_block::get_weight_buffer
DataType * get_weight_buffer(int index)
Definition: kfac_block.hpp:182

lbann::kfac_block::get_inverse_proc_rank
size_t get_inverse_proc_rank() const
Definition: kfac_block.hpp:170

lbann::kfac_block::on_forward_prop_end
virtual void on_forward_prop_end(lbann_comm *comm)
Definition: kfac_block.hpp:81

lbann::kfac_block::kfac_block
kfac_block(Layer *layer, kfac::KFACExecutionContext *context, size_t layer_id, size_t inverse_proc_rank, bool enable_copy_errors, bool enable_copy_activations, int input_size, int output_size)
Definition: kfac_block.hpp:60

lbann::kfac_block::m_weight_gradients
std::vector< std::unique_ptr< AbsDistMat > > m_weight_gradients
Definition: kfac_block.hpp:224

lbann::get_sync_info
El::SyncInfo< D > get_sync_info(El::Matrix< TensorDataType, D > const &m) noexcept
Get a SyncInfo from an Matrix.
Definition: sync_info_helpers.hpp:39

lbann
Definition: callback_helpers.hpp:32