27 #ifndef LBANN_EXECUTION_ALGORITHMS_KFAC_BLOCK_CHANNELWISE_FC_HPP_INCLUDED 28 #define LBANN_EXECUTION_ALGORITHMS_KFAC_BLOCK_CHANNELWISE_FC_HPP_INCLUDED 37 template <El::Device Device>
45 const size_t layer_id,
46 const size_t inverse_proc_rank,
47 const bool enable_copy_errors,
48 const bool enable_copy_activations,
50 const int output_size)
56 enable_copy_activations,
87 bool print_matrix_summary)
final;
89 const std::vector<El::AbstractMatrix<DataType>*>
92 std::vector<El::AbstractMatrix<DataType>*> ret = {
99 DataType kronecker_decay,
101 bool print_matrix_summary)
final;
105 DataType damping_act,
106 DataType damping_err,
107 DataType learning_rate_factor,
108 bool use_eigen_decomposition,
110 bool print_matrix_summary,
111 bool print_time)
final;
114 DataType learning_rate_factor,
116 bool print_matrix_summary,
117 bool print_time)
final;
120 int num_local_activations,
121 int num_local_errors,
122 int num_weights)
final;
129 const std::vector<El::AbstractMatrix<DataType>*>
140 El::Matrix<double, El::Device::CPU>& inverse_matrices_size,
141 int block_number)
final;
149 std::ostringstream oss;
150 oss << kfac_block<Device>::get_info();
158 const El::AbstractMatrix<DataType>& activations,
162 static double compute_pi(
const El::Matrix<DataType, Device>& A,
163 const El::Matrix<DataType, Device>& G,
164 El::Matrix<DataType, Device>& ws,
165 const El::SyncInfo<Device>& sync_info);
176 std::vector<std::tuple<std::string, size_t, size_t>>
207 #endif // LBANN_EXECUTION_ALGORITHMS_KFAC_kfac_block_channelwise_fc_HPP_INCLUDED El::Matrix< DataType, Device > m_kronecker_factor_buf_A
Lower triangle buffers of Kronecker factors.
kfac_block_channelwise_fc & operator=(const kfac_block_channelwise_fc &)=default
El::Matrix< DataType, Device > m_kronecker_average_A
Exponential moving average of Kronecker factors.
static double compute_pi(const El::Matrix< DataType, Device > &A, const El::Matrix< DataType, Device > &G, El::Matrix< DataType, Device > &ws, const El::SyncInfo< Device > &sync_info)
Returns the pi constant.
Layer * m_layer
The target layer.
const std::vector< El::AbstractMatrix< DataType > * > get_local_kronecker_buffers() final
Get buffers of Kronecker factors for reduce-scatter.
El::Matrix< DataType, Device > m_kronecker_factor_buf_G
std::vector< std::tuple< std::string, size_t, size_t > > get_internal_matrix_info() const override
Return the list of internal matrices' (name, height, width) for debugging. All internal matrices shou...
El::Matrix< DataType, Device > m_kronecker_average_G
std::vector< int > get_inverse_matrices_size_vector(lbann_comm *comm) final
Get inverse matrices size vector.
int get_inverse_matrices(El::Matrix< DataType, Device > &output, int offset) final
Copy inverse matrices to output buffer.
void start_communication_backward_end(lbann_comm *comm) final
void initialize_activations_and_errors(lbann_comm *comm, int num_local_activations, int num_local_errors, int num_weights) final
Copies activations, errors, and weights from model class to private variables to be used in KFAC comp...
Neural network tensor operation.
int set_inverse_matrices(El::Matrix< DataType, Device > &workspace, int offset, lbann_comm *comm) final
Copy inverse matrices from output buffer.
El::Matrix< DataType, Device > m_kronecker_inverse_G
constexpr El::Device Device
convolution_layer< DataType, data_layout::DATA_PARALLEL, Device > * get_conv_layer()
Get the pointer to its convolution_layer.
void update_kronecker_average(lbann_comm *comm, DataType kronecker_decay, bool print_matrix, bool print_matrix_summary) final
Update the average Kronecker factors.
size_t m_conv_input_spatial_prod
size_t m_Ainv_height
Size and height of inverse matrices.
int get_local_memory_consumption() final
Get local Memory Consumption.
void end_communication_backward_end(lbann_comm *comm) final
El::Matrix< DataType, Device > m_kronecker_inverse_A
Inverse of the average Kronecker factors.
void end_communication_forward_end(lbann_comm *comm) final
static void get_kronecker_factor_fc(El::AbstractMatrix< DataType > &factor, const El::AbstractMatrix< DataType > &activations, DataType alpha)
Gets the Kronecker factor matrix of a FC layer.
void update_kronecker_inverse(lbann_comm *comm, bool use_pi, DataType damping_act, DataType damping_err, DataType learning_rate_factor, bool use_eigen_decomposition, bool print_matrix, bool print_matrix_summary, bool print_time) final
Compute the inverse of the average Kronecker factors.
size_t m_height_A
The heights of the Kronecker factors.
std::vector< int > m_conv_input_spatial_dims
const bool m_has_bias
Information to perform its computation.
void resize_inverse_matrices_size(El::Matrix< double, El::Device::CPU > &inverse_matrices_size, int block_number) final
Get inverse matrices size vector.
void compute_preconditioned_gradients(lbann_comm *comm, DataType learning_rate_factor, bool print_matrix, bool print_matrix_summary, bool print_time) final
Compute the inverse of the average Kronecker factors.
El::Matrix< DataType, Device > m_grad_buffer_v
Vectorized gradient buffer (only for fully-connecter layers).
kfac_block_channelwise_fc(Layer *layer, kfac::KFACExecutionContext *context, const size_t layer_id, const size_t inverse_proc_rank, const bool enable_copy_errors, const bool enable_copy_activations, const int input_size, const int output_size)
void compute_local_kronecker_factors(lbann_comm *comm, bool print_matrix, bool print_matrix_summary) final
Compute Kronecker factors.
std::string get_info() const final
Get block's information in one line.
const std::vector< El::AbstractMatrix< DataType > * > get_preconditioned_grad_buffers() final
Get buffers of preconditioned parameter gradients.
std::vector< int > m_conv_output_spatial_dims
void start_communication_forward_end(lbann_comm *comm) final
size_t m_conv_output_spatial_prod
int get_inverse_matrices_size(lbann_comm *comm) final
Get inverse matrices size (offset).