LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
kfac_block_bn.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_BN_HPP_INCLUDED
28 #define LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_BN_HPP_INCLUDED
29 
33 
34 namespace lbann {
35 
36 namespace kfac_bn_util {
37 
40 template <El::Device Device>
41 void compute_bn_factor_data2col(const El::Matrix<DataType, Device>& activations,
42  const El::Matrix<DataType, Device>& errors,
43  const El::Matrix<DataType, Device>& scales,
44  const El::Matrix<DataType, Device>& biases,
45  El::Matrix<DataType, Device>& cols,
46  size_t batch_size,
47  size_t num_channels,
48  size_t spatial_prod,
49  const El::SyncInfo<Device>& sync_info);
50 
51 } // namespace kfac_bn_util
52 
55 template <El::Device Device>
56 class kfac_block_bn : public kfac_block<Device>
57 {
58 public:
63  size_t layer_id,
64  size_t inverse_proc_rank,
65  bool enable_copy_errors,
66  bool enable_copy_activations,
67  int input_size,
68  int output_size)
69  : kfac_block<Device>(layer,
70  context,
71  layer_id,
72  inverse_proc_rank,
73  enable_copy_errors,
74  enable_copy_activations,
75  input_size,
76  output_size)
77  {
78  const auto parent = layer->get_parent_layers()[0];
79  const bool is_after_fc =
80  (dynamic_cast<const fully_connected_layer<DataType,
82  Device>*>(parent) != nullptr);
83  m_is_after_conv =
84  (dynamic_cast<const convolution_layer<DataType,
86  Device>*>(parent) != nullptr);
87  if (!is_after_fc && !m_is_after_conv) {
88  std::stringstream err;
89  err << "The K-FAC only supports batch-normalization layers after "
90  << "fully-connected layers or convolutional layers."
91  << " layer: " << layer->get_name()
92  << " parent type: " << parent->get_type();
93  LBANN_ERROR(err.str());
94  }
95 
96  if (is_after_fc) {
97  const auto& dtl_parent =
98  dynamic_cast<const data_type_layer<DataType>&>(*parent);
99  const El::AbstractMatrix<DataType>& local_activations =
100  dtl_parent.get_local_activations();
101  m_num_channels = local_activations.Height();
102  m_spatial_prod = 1;
103  }
104  else {
105  const auto input_dims = layer->get_input_dims();
106  m_num_channels = input_dims[0];
107  m_spatial_prod = 1;
108  // std::accumulate might overflow for large 3D layers
109  for (auto i = input_dims.begin() + 1; i != input_dims.end(); i++)
110  m_spatial_prod *= *i;
111  }
112  }
113  kfac_block_bn(const kfac_block_bn&) = default;
114  kfac_block_bn& operator=(const kfac_block_bn&) = default;
115 
117  {
118  int total_size = 0;
119  total_size += m_fisher_buf.Height() * m_fisher_buf.Width();
120  total_size += m_fisher_average.Height() * m_fisher_average.Width();
121  total_size += m_fisher_inverse.Height() * m_fisher_inverse.Width();
122  return total_size;
123  }
124 
125  void compute_local_kronecker_factors(lbann_comm* comm,
126  bool print_matrix,
127  bool print_matrix_summary) override;
128 
129  const std::vector<El::AbstractMatrix<DataType>*>
131  {
132  std::vector<El::AbstractMatrix<DataType>*> ret = {&m_fisher_buf};
133  return ret;
134  }
135 
137  DataType kronecker_decay,
138  bool print_matrix,
139  bool print_matrix_summary) override;
140 
141  void update_kronecker_inverse(lbann_comm* comm,
142  bool use_pi,
143  DataType damping_act,
144  DataType damping_err,
145  DataType learning_rate_factor,
146  bool use_eigen_decomposition,
147  bool print_matrix,
148  bool print_matrix_summary,
149  bool print_time) override;
150 
151  void compute_preconditioned_gradients(lbann_comm* comm,
152  DataType learning_rate_factor,
153  bool print_matrix,
154  bool print_matrix_summary,
155  bool print_time) override;
156 
157  void start_communication_forward_end(lbann_comm* comm) override;
158  void end_communication_forward_end(lbann_comm* comm) override;
159  void start_communication_backward_end(lbann_comm* comm) override;
160  void end_communication_backward_end(lbann_comm* comm) override;
161 
162  const std::vector<El::AbstractMatrix<DataType>*>
163  get_preconditioned_grad_buffers() override;
164 
165  std::vector<std::tuple<std::string, size_t, size_t>>
166  get_internal_matrix_info() const override;
167 
168  std::string get_info() const override
169  {
170  std::ostringstream oss;
171  oss << kfac_block<Device>::get_info()
172  << ", is_after_conv=" << m_is_after_conv;
173  return oss.str();
174  }
176  int get_inverse_matrices(El::Matrix<DataType, Device>& output,
177  int offset) override;
178 
180  int get_inverse_matrices_size(lbann_comm* comm) override;
181 
183  std::vector<int> get_inverse_matrices_size_vector(lbann_comm* comm) override
184  {
185  LBANN_ERROR("Sub-grid parallelism is not implemented for BN layer");
186  }
187 
190  El::Matrix<double, El::Device::CPU>& inverse_matrices_size,
191  int block_number) override
192  {
193  LBANN_ERROR("Sub-grid parallelism is not implemented for BN layer");
194  }
195 
197  int set_inverse_matrices(El::Matrix<DataType, Device>& workspace,
198  int offset,
199  lbann_comm* comm) override;
200 
201 private:
204  size_t m_num_channels, m_spatial_prod;
205 
207  El::Matrix<DataType, Device> m_fisher_buf;
208 
210  El::Matrix<DataType, Device> m_fisher_average;
211 
213  El::Matrix<DataType, Device> m_fisher_inverse;
214 };
215 
216 } // namespace lbann
217 
218 #endif // LBANN_EXECUTION_ALGORITHMS_KFAC_KFAC_BLOCK_BN_HPP_INCLUDED
bool m_is_after_conv
Information to perform its computation.
void resize_inverse_matrices_size(El::Matrix< double, El::Device::CPU > &inverse_matrices_size, int block_number) override
Get inverse matrices size vector.
#define LBANN_ERROR(...)
Definition: exception.hpp:37
int get_local_memory_consumption() override
Get local Memory Consumption.
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Neural network tensor operation.
Definition: layer.hpp:285
const std::vector< El::AbstractMatrix< DataType > * > get_local_kronecker_buffers() override
Get buffers of Kronecker factors for reduce-scatter.
constexpr El::Device Device
OutputAbsMatrixType & get_local_activations(int child_index=0)
std::string get_info() const override
Get block&#39;s information in one line.
kfac_block_bn(Layer *layer, kfac::KFACExecutionContext *context, size_t layer_id, size_t inverse_proc_rank, bool enable_copy_errors, bool enable_copy_activations, int input_size, int output_size)
El::Matrix< DataType, Device > m_fisher_inverse
Inverse of the average Fisher matrix.
El::Matrix< DataType, Device > m_fisher_average
Exponential moving average of the Fisher matrix.
void update_kronecker_average(El::Matrix< DataType, Device > &Aave, const El::Matrix< DataType, Device > &A, size_t count, double decay, const El::SyncInfo< Device > &sync_info)
Update a Kronecker factor matrix using decay.
std::vector< const Layer * > get_parent_layers() const
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
void compute_bn_factor_data2col(const El::Matrix< DataType, Device > &activations, const El::Matrix< DataType, Device > &errors, const El::Matrix< DataType, Device > &scales, const El::Matrix< DataType, Device > &biases, El::Matrix< DataType, Device > &cols, size_t batch_size, size_t num_channels, size_t spatial_prod, const El::SyncInfo< Device > &sync_info)
The memory copy part of compute_bn_factor. Combined with GEMM.
El::Matrix< DataType, Device > m_fisher_buf
Lower triangle buffers of the Fisher block.
Affine transformation.
std::vector< int > get_inverse_matrices_size_vector(lbann_comm *comm) override
Get inverse matrices size vector.