LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
kfac_block_fc_conv.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_FC_CONV_HPP_INCLUDED
28 #define LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_FC_CONV_HPP_INCLUDED
29 
32 
33 namespace lbann {
34 
35 namespace kfac_fc_conv_util {
36 
38 template <El::Device Device>
39 void get_diagonal(El::Matrix<DataType, Device>& diag,
40  const El::Matrix<DataType, Device>& A,
41  const El::SyncInfo<Device>& sync_info);
42 
44 template <El::Device Device>
45 void conv_transpose(const El::Matrix<DataType, Device>& activations,
46  El::Matrix<DataType, Device>& act_columns,
47  size_t mini_batch_size,
48  size_t num_channels,
49  size_t spatial_prod,
50  const El::SyncInfo<Device>& sync_info);
51 
53 template <El::Device Device>
54 void im2col(const El::Matrix<DataType, Device>& im,
55  El::Matrix<DataType, Device>& col,
56  const int num_channels,
57  const int im_num_dims,
58  const int* im_dims,
59  const int* im_pads,
60  const int* window_dims,
61  const int* window_strides,
62  const int batch_size,
63  const El::SyncInfo<Device>& sync_info);
64 
65 } // namespace kfac_fc_conv_util
66 
70 template <El::Device Device>
71 class kfac_block_fc_conv : public kfac_block<Device>
72 {
73 public:
78  const size_t layer_id,
79  const size_t inverse_proc_rank,
80  const bool enable_copy_errors,
81  const bool enable_copy_activations,
82  const int input_size,
83  const int output_size,
84  const bool is_conv)
85  : kfac_block<Device>(layer,
86  context,
87  layer_id,
88  inverse_proc_rank,
89  enable_copy_errors,
90  enable_copy_activations,
91  input_size,
92  output_size),
93  m_is_conv(is_conv),
94  m_has_bias(layer->num_weights() > 1)
95  {
96  if (m_is_conv) {
97  m_conv_input_spatial_prod = 1;
98  const auto input_dims = layer->get_input_dims();
99  for (auto i = input_dims.begin() + 1; i != input_dims.end(); i++) {
100  m_conv_input_spatial_prod *= *i;
101  m_conv_input_spatial_dims.push_back(*i);
102  }
103 
104  m_conv_output_spatial_prod = 1;
105  const auto output_dims = layer->get_output_dims();
106  for (auto i = output_dims.begin() + 1; i != output_dims.end(); i++) {
107  m_conv_output_spatial_prod *= *i;
108  m_conv_output_spatial_dims.push_back(*i);
109  }
110 
111  if (input_dims.size() != 3 && input_dims.size() != 4) {
112  std::stringstream err;
113  err << "The K-FAC only supports 2D or 3D tensors."
114  << " layer: " << layer->get_name() << ", input_dims: ";
115  for (auto i = input_dims.begin(); i != input_dims.end(); i++)
116  err << (std::distance(input_dims.begin(), i) > 0 ? "," : "") << *i;
117  LBANN_ERROR(err.str());
118  }
119  }
120 
121  if (m_is_conv && m_has_bias) {
122  std::stringstream err;
123  err << "The K-FAC does not currently support biases for convolutional "
124  "layers."
125  << " layer: " << layer->get_name();
126  LBANN_ERROR(err.str());
127  }
128  }
129 
130  kfac_block_fc_conv(const kfac_block_fc_conv&) = default;
131  kfac_block_fc_conv& operator=(const kfac_block_fc_conv&) = default;
132 
134  {
135  int total_size = 0;
136  total_size +=
137  m_kronecker_inverse_A.Height() * m_kronecker_inverse_A.Width();
138  total_size +=
139  m_kronecker_inverse_G.Height() * m_kronecker_inverse_G.Width();
140  total_size +=
141  m_kronecker_average_A.Height() * m_kronecker_average_A.Width();
142  total_size +=
143  m_kronecker_average_G.Height() * m_kronecker_average_G.Width();
144  total_size +=
145  m_kronecker_factor_buf_A.Height() * m_kronecker_factor_buf_A.Width();
146  total_size +=
147  m_kronecker_factor_buf_G.Height() * m_kronecker_factor_buf_G.Width();
148  total_size += m_grad_buffer_v.Height() * m_grad_buffer_v.Width();
149  return total_size;
150  }
151 
152  void compute_local_kronecker_factors(lbann_comm* comm,
153  bool print_matrix,
154  bool print_matrix_summary) override;
155 
156  const std::vector<El::AbstractMatrix<DataType>*>
158  {
159  std::vector<El::AbstractMatrix<DataType>*> ret = {
160  &m_kronecker_factor_buf_A,
161  &m_kronecker_factor_buf_G};
162  return ret;
163  }
164 
166  DataType kronecker_decay,
167  bool print_matrix,
168  bool print_matrix_summary) override;
169 
170  void update_kronecker_inverse(lbann_comm* comm,
171  bool use_pi,
172  DataType damping_act,
173  DataType damping_err,
174  DataType learning_rate_factor,
175  bool use_eigen_decomposition,
176  bool print_matrix,
177  bool print_matrix_summary,
178  bool print_time) override;
179 
180  void compute_preconditioned_gradients(lbann_comm* comm,
181  DataType learning_rate_factor,
182  bool print_matrix,
183  bool print_matrix_summary,
184  bool print_time) override;
185 
186  void initialize_activations_and_errors(lbann_comm* comm,
187  int num_local_activations,
188  int num_local_errors,
189  int num_weights) override;
190 
191  void start_communication_forward_end(lbann_comm* comm) override;
192  void end_communication_forward_end(lbann_comm* comm) override;
193  void start_communication_backward_end(lbann_comm* comm) override;
194  void end_communication_backward_end(lbann_comm* comm) override;
195 
196  const std::vector<El::AbstractMatrix<DataType>*>
197  get_preconditioned_grad_buffers() override;
198 
199  int get_inverse_matrices(El::Matrix<DataType, Device>& output,
200  int offset) override;
201 
202  int get_inverse_matrices_size(lbann_comm* comm) override;
203 
204  std::vector<int> get_inverse_matrices_size_vector(lbann_comm* comm) override;
205 
206  void resize_inverse_matrices_size(
207  El::Matrix<double, El::Device::CPU>& inverse_matrices_size,
208  int block_number) override;
209 
210  int set_inverse_matrices(El::Matrix<DataType, Device>& workspace,
211  int offset,
212  lbann_comm* comm) override;
213 
214  std::string get_info() const override
215  {
216  std::ostringstream oss;
217  oss << kfac_block<Device>::get_info() << ", is_conv=" << m_is_conv;
218  return oss.str();
219  }
220 
221 private:
223  static void
224  get_kronecker_factor_fc(El::AbstractMatrix<DataType>& factor,
225  const El::AbstractMatrix<DataType>& activations,
226  DataType alpha);
227 
229  static void get_kronecker_factor_conv(
230  El::Matrix<DataType, Device>& factor,
231  El::Matrix<DataType, Device>& Acol,
232  const El::Matrix<DataType, Device>& activations,
233  DataType alpha,
234  size_t local_batch_size,
235  size_t num_channels,
236  const std::vector<int>& spatial_dims,
238  l_conv,
239  bool use_im2col,
240  const El::SyncInfo<Device>& sync_info);
241 
243  static double compute_pi(const El::Matrix<DataType, Device>& A,
244  const El::Matrix<DataType, Device>& G,
245  El::Matrix<DataType, Device>& ws,
246  const El::SyncInfo<Device>& sync_info);
247 
251  {
252  return dynamic_cast<
254  this->m_layer);
255  }
256 
257  std::vector<std::tuple<std::string, size_t, size_t>>
258  get_internal_matrix_info() const override;
259 
261  const bool m_is_conv, m_has_bias;
262  size_t m_conv_input_spatial_prod, m_conv_output_spatial_prod;
263  std::vector<int> m_conv_input_spatial_dims, m_conv_output_spatial_dims;
264 
266  El::Matrix<DataType, Device> m_kronecker_factor_buf_A,
268 
270  size_t m_height_A, m_height_G;
271 
273  El::Matrix<DataType, Device> m_kronecker_average_A, m_kronecker_average_G;
274 
276  El::Matrix<DataType, Device> m_kronecker_inverse_A, m_kronecker_inverse_G;
277 
279  size_t m_Ainv_height = 0, m_Ainv_width = 0, m_Ginv_height = 0,
280  m_Ginv_width = 0;
281 
283  El::Matrix<DataType, Device> m_grad_buffer_v;
284 };
285 
286 } // namespace lbann
287 
288 #endif // LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_FC_CONV_HPP_INCLUDED
std::string get_info() const override
Get block&#39;s information in one line.
El::Matrix< DataType, Device > m_grad_buffer_v
Vectorized gradient buffer (only for fully-connecter layers).
convolution_layer< DataType, data_layout::DATA_PARALLEL, Device > * get_conv_layer()
Get the pointer to its convolution_layer.
#define LBANN_ERROR(...)
Definition: exception.hpp:37
int get_local_memory_consumption() override
Get local Memory Consumption.
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Neural network tensor operation.
Definition: layer.hpp:285
std::vector< int > m_conv_output_spatial_dims
constexpr El::Device Device
El::Matrix< DataType, Device > m_kronecker_factor_buf_G
const bool m_is_conv
Information to perform its computation.
void update_kronecker_average(El::Matrix< DataType, Device > &Aave, const El::Matrix< DataType, Device > &A, size_t count, double decay, const El::SyncInfo< Device > &sync_info)
Update a Kronecker factor matrix using decay.
El::Matrix< DataType, Device > m_kronecker_average_G
El::Matrix< DataType, Device > m_kronecker_inverse_G
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
void conv_transpose(const El::Matrix< DataType, Device > &activations, El::Matrix< DataType, Device > &act_columns, size_t mini_batch_size, size_t num_channels, size_t spatial_prod, const El::SyncInfo< Device > &sync_info)
Transpose NC(D)HW matrix to N(D)HWC.
void im2col(const El::Matrix< DataType, Device > &im, El::Matrix< DataType, Device > &col, const int num_channels, const int im_num_dims, const int *im_dims, const int *im_pads, const int *window_dims, const int *window_strides, const int batch_size, const El::SyncInfo< Device > &sync_info)
im2col.
kfac_block_fc_conv(Layer *layer, kfac::KFACExecutionContext *context, const size_t layer_id, const size_t inverse_proc_rank, const bool enable_copy_errors, const bool enable_copy_activations, const int input_size, const int output_size, const bool is_conv)
void get_diagonal(El::Matrix< DataType, Device > &diag, const El::Matrix< DataType, Device > &A, const El::SyncInfo< Device > &sync_info)
Get diagonal elements of a matrix.
const std::vector< El::AbstractMatrix< DataType > * > get_local_kronecker_buffers() override
Get buffers of Kronecker factors for reduce-scatter.
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.