LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
kfac_block.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_HPP_INCLUDED
28 #define LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_HPP_INCLUDED
29 
31 #include "lbann/layers/layer.hpp"
32 
33 namespace lbann {
34 
35 // Forward declaration
36 namespace kfac {
37 class KFACExecutionContext;
38 }
39 
40 namespace kfac {
41 #if defined AL_HAS_NCCL
42 using BackendT = ::Al::NCCLBackend;
43 #elif defined AL_HAS_HOST_TRANSFER
44 using BackendT = ::Al::HostTransferBackend;
45 #else
46 using BackendT = ::Al::MPIBackend;
47 #endif
48 
49 using ReqT = typename BackendT::req_type;
50 } // namespace kfac
51 
54 template <El::Device Device>
55 class kfac_block
56 {
57 public:
60  kfac_block(Layer* layer,
62  size_t layer_id,
63  size_t inverse_proc_rank,
64  bool enable_copy_errors,
65  bool enable_copy_activations,
66  int input_size,
67  int output_size)
68  : m_layer(layer),
69  m_layer_id(layer_id),
70  m_inverse_proc_rank(inverse_proc_rank),
71  m_input_size(input_size),
72  m_output_size(output_size),
73  m_enable_copy_errors(enable_copy_errors),
74  m_enable_copy_activations(enable_copy_activations),
75  m_context(context)
76  {
77  m_has_kronecker_inverse = false;
78  }
79  virtual ~kfac_block() = default;
80 
81  virtual void on_forward_prop_end(lbann_comm* comm) {}
82 
84  virtual int get_local_memory_consumption() = 0;
85 
87  virtual void compute_local_kronecker_factors(lbann_comm* comm,
88  bool print_matrix,
89  bool print_matrix_summary);
90 
92  virtual const std::vector<El::AbstractMatrix<DataType>*>
93  get_local_kronecker_buffers();
94 
96  virtual void update_kronecker_average(lbann_comm* comm,
97  DataType kronecker_decay,
98  bool print_matrix,
99  bool print_matrix_summary);
100 
102  virtual void update_kronecker_inverse(lbann_comm* comm,
103  bool use_pi,
104  DataType damping_act,
105  DataType damping_err,
106  DataType learning_rate_factor,
107  bool use_eigen_decomposition,
108  bool print_matrix,
109  bool print_matrix_summary,
110  bool print_time);
111 
113  virtual void compute_preconditioned_gradients(lbann_comm* comm,
114  DataType learning_rate_factor,
115  bool print_matrix,
116  bool print_matrix_summary,
117  bool print_time);
118 
121  virtual void initialize_activations_and_errors(lbann_comm* comm,
122  int num_local_activations,
123  int num_local_errors,
124  int num_weights);
125 
126  virtual void start_communication_forward_end(lbann_comm* comm) = 0;
127  virtual void end_communication_forward_end(lbann_comm* comm) = 0;
128  virtual void start_communication_backward_end(lbann_comm* comm) = 0;
129  virtual void end_communication_backward_end(lbann_comm* comm) = 0;
130 
132  virtual const std::vector<El::AbstractMatrix<DataType>*>
133  get_preconditioned_grad_buffers();
134 
136  virtual int get_inverse_matrices(El::Matrix<DataType, Device>& output,
137  int offset) = 0;
138 
140  virtual int get_inverse_matrices_size(lbann_comm* comm) = 0;
141 
143  virtual std::vector<int>
144  get_inverse_matrices_size_vector(lbann_comm* comm) = 0;
145 
147  virtual void resize_inverse_matrices_size(
148  El::Matrix<double, El::Device::CPU>& inverse_matrices_size,
149  int block_number) = 0;
150 
152  virtual int set_inverse_matrices(El::Matrix<DataType, Device>& workspace,
153  int offset,
154  lbann_comm* comm) = 0;
155 
156  void set_current_batch_size(El::Int batch_size) { m_batch_size = batch_size; }
157 
159  virtual std::string get_info() const
160  {
161  std::ostringstream oss;
162  oss << "name=" << m_layer->get_name() << ", id=" << m_layer_id
163  << ", type=" << m_layer->get_type()
164  << ", inverse_proc_rank=" << m_inverse_proc_rank;
165  return oss.str();
166  }
167 
168  std::string get_name() const { return m_layer->get_name(); }
169 
170  size_t get_inverse_proc_rank() const { return m_inverse_proc_rank; }
171 
172  DataType* get_local_activation_buffer(int index)
173  {
174  return m_parent_local_activations[index]->Buffer();
175  }
176 
177  DataType* get_local_error_buffer(int index)
178  {
179  return m_child_local_errors[index]->Buffer();
180  }
181 
182  DataType* get_weight_buffer(int index)
183  {
184  return m_weight_values[index]->Buffer();
185  }
186 
187  DataType* get_gradient_wrt_weight_buffer(int index)
188  {
189  return m_weight_gradients[index]->Buffer();
190  }
191 
192  El::Int get_current_batch_size() { return m_batch_size; }
193 
194  El::Int get_input_size() { return m_input_size; }
195 
196  El::Int get_output_size() { return m_output_size; }
197 
201  virtual std::vector<std::tuple<std::string, size_t, size_t>>
202  get_internal_matrix_info() const;
203 
204 protected:
207  El::Matrix<DataType, Device>&
208  get_workspace_matrix(const std::string& key, size_t height, size_t width);
209 
211  El::SyncInfo<Device> get_sync_info();
212 
215 
218  const size_t m_layer_id;
219 
222 
224  std::vector<std::unique_ptr<AbsDistMat>> m_parent_local_activations,
225  m_child_local_errors, m_weight_gradients, m_subset_matrix, m_errors_copy,
226  m_activations_copy;
227 
231  std::vector<std::unique_ptr<AbsDistMat>> m_weight_values;
232 
233  std::vector<kfac::ReqT> m_requests_forward_end, m_requests_backward_end;
234 
237  int m_input_size, m_output_size, m_batch_size;
238 
241 
244 
247 
248 private:
252 };
253 
254 } // namespace lbann
255 
256 #endif // LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_HPP_INCLUDED
bool m_enable_copy_errors
Enable copying of errors to enhance async communication.
Definition: kfac_block.hpp:240
Layer * m_layer
The target layer.
Definition: kfac_block.hpp:214
const int m_inverse_proc_rank
The process ID which perform inverse on Kronecker.
Definition: kfac_block.hpp:221
::Al::MPIBackend BackendT
Definition: kfac_block.hpp:46
void set_current_batch_size(El::Int batch_size)
Definition: kfac_block.hpp:156
bool m_enable_copy_activations
Enable copying of activations to enhance async communication.
Definition: kfac_block.hpp:243
El::Int get_current_batch_size()
Definition: kfac_block.hpp:192
DataType * get_gradient_wrt_weight_buffer(int index)
Definition: kfac_block.hpp:187
Neural network tensor operation.
Definition: layer.hpp:285
std::vector< std::unique_ptr< AbsDistMat > > m_weight_values
Translatebetweengrid funciton has a basic implementation for STAR,STAR distributed matrices...
Definition: kfac_block.hpp:231
typename BackendT::req_type ReqT
Definition: kfac_block.hpp:49
const size_t m_layer_id
The layer ID in the model. TODO: Remove this.
Definition: kfac_block.hpp:218
El::Int get_output_size()
Definition: kfac_block.hpp:196
kfac::KFACExecutionContext * m_context
The execution context that created this block. TODO: Use its own workspace and remove this pointer...
Definition: kfac_block.hpp:251
void update_kronecker_average(El::Matrix< DataType, Device > &Aave, const El::Matrix< DataType, Device > &A, size_t count, double decay, const El::SyncInfo< Device > &sync_info)
Update a Kronecker factor matrix using decay.
std::vector< kfac::ReqT > m_requests_forward_end
Definition: kfac_block.hpp:233
virtual std::string get_info() const
Get block&#39;s information in one line.
Definition: kfac_block.hpp:159
std::string get_name() const
Definition: kfac_block.hpp:168
DataType * get_local_activation_buffer(int index)
Definition: kfac_block.hpp:172
DataType * get_local_error_buffer(int index)
Definition: kfac_block.hpp:177
El::Int get_input_size()
Definition: kfac_block.hpp:194
bool m_has_kronecker_inverse
Whether this block already has an inverse history.
Definition: kfac_block.hpp:246
DataType * get_weight_buffer(int index)
Definition: kfac_block.hpp:182
size_t get_inverse_proc_rank() const
Definition: kfac_block.hpp:170
virtual void on_forward_prop_end(lbann_comm *comm)
Definition: kfac_block.hpp:81
kfac_block(Layer *layer, kfac::KFACExecutionContext *context, size_t layer_id, size_t inverse_proc_rank, bool enable_copy_errors, bool enable_copy_activations, int input_size, int output_size)
Definition: kfac_block.hpp:60
std::vector< std::unique_ptr< AbsDistMat > > m_weight_gradients
Definition: kfac_block.hpp:224
El::SyncInfo< D > get_sync_info(El::Matrix< TensorDataType, D > const &m) noexcept
Get a SyncInfo from an Matrix.