LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
optimizer.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_OPTIMIZERS_OPTIMIZER_HPP_INCLUDED
28 #define LBANN_OPTIMIZERS_OPTIMIZER_HPP_INCLUDED
29 
30 #include "lbann/base.hpp"
34 #ifdef LBANN_HAS_GPU
36 #endif // LBANN_HAS_GPU
38 #include "lbann/utils/memory.hpp"
39 
40 #include <memory>
41 #include <string>
42 #include <typeindex>
43 #include <unordered_set>
44 
45 namespace lbann_data {
46 class Optimizer;
47 }
48 
49 namespace lbann {
50 
53 {
55  ready,
59  cleared,
67 };
68 
70 std::string to_string(optimizer_gradient_status status);
71 
72 // Forward declarations
73 class lbann_comm;
74 class persist;
75 class weights;
76 
85 class optimizer : public Cloneable<HasAbstractFunction<optimizer>>
86 {
87 public:
89 
91  optimizer();
92  virtual ~optimizer() = default;
93 
95 
97  virtual std::string get_type() const = 0;
99  virtual description get_description() const;
100 
101  virtual double get_learning_rate() const = 0;
102  virtual void set_learning_rate(double) = 0;
103 
105 
107  virtual void setup(weights* w) = 0;
108 
121  template <typename TensorDataType>
122  void add_to_gradient(El::AbstractDistMatrix<TensorDataType> const& contrib,
123  TensorDataType scale = 1.f,
124  bool allreduce_needed = false);
125 
127  void clear_gradient();
128 
131  El::Int get_num_gradient_sources() const;
139  void add_gradient_source(const void* source);
140 
148  void remove_gradient_source(const void* source);
149 
151  virtual void step() = 0;
152 
176  template <typename TensorDataType>
177  El::AbstractDistMatrix<TensorDataType>&
178  get_gradient_buffer(TensorDataType& buf_scale,
179  TensorDataType& in_scale,
180  bool allreduce_needed = false);
181 
183 
184 
187  lbann_comm& get_comm() { return *m_comm; }
188 
190  const lbann_comm& get_comm() const { return *m_comm; }
191 
193 
194 
197  EvalType get_step_time() const { return m_step_time; }
198 
200  virtual void reset_counters() { m_step_time = 0; }
201 
203 
204 
207  template <class Archive>
208  void serialize(Archive& ar);
209 
211 
213  virtual void write_proto(lbann_data::Optimizer& proto) const = 0;
214 
217  {
218  public:
219  virtual ~GradientHelper() = default;
220  optimizer_gradient_status get_status() const noexcept { return status_; }
221  void set_status(optimizer_gradient_status s) noexcept { status_ = s; }
222  virtual El::BaseDistMatrix& gradient() noexcept = 0;
223  virtual El::BaseDistMatrix const& gradient() const noexcept = 0;
224  virtual void start_allreduce(lbann_comm&) = 0;
225  virtual void complete_allreduce(lbann_comm&) = 0;
226  virtual void clear() = 0;
227 
228  private:
229  optimizer_gradient_status status_ = optimizer_gradient_status::cleared;
230  }; // class GradientHelper
231 
232  template <typename TensorDataType>
234  {
235  public:
236  using AbsDistMatType = El::AbstractDistMatrix<TensorDataType>;
237 
238  public:
239  GradientHelperImpl(El::Int height, El::Int width, El::DistData dist_data)
240  : gradient_{AbsDistMatType::Instantiate(dist_data)}
241  {
242  El::Zeros(*gradient_, height, width);
243  }
244  AbsDistMatType& gradient() noexcept override { return *gradient_; }
245  AbsDistMatType const& gradient() const noexcept override
246  {
247  return *gradient_;
248  }
249  void start_allreduce(lbann_comm& comm) override;
250  void complete_allreduce(lbann_comm& comm) override;
251  void clear() override;
252 
253  private:
254  std::unique_ptr<AbsDistMatType> gradient_;
256  }; // class GradientHelperImpl
257 
259  optimizer(const optimizer& other);
260  optimizer& operator=(const optimizer& other);
261 
264  {
265  return m_gradient_status;
266  }
268  {
269  m_gradient_status = status;
270  }
271  std::unordered_set<const void*>& get_gradient_sources()
272  {
273  return m_gradient_sources;
274  }
275  void set_comm(lbann_comm& comm) { m_comm = &comm; }
276 
277  void set_step_time(EvalType time) { m_step_time = time; }
278 
279  void inc_step_time(EvalType time) { m_step_time += time; }
280 
281  virtual std::tuple<El::Int, El::Int, El::DistData>
282  get_matrix_info() const = 0;
283 
284  template <typename TensorDataType>
285  void accumulate_all_gradient_contributions(
286  El::AbstractDistMatrix<TensorDataType>& gradient);
287 
293  void start_gradient_allreduce();
294 
300  void finish_gradient_allreduce();
301 
302 private:
305 
316  std::unordered_set<const void*> m_gradient_sources;
317 
319  optimizer_gradient_status m_gradient_status =
320  optimizer_gradient_status::cleared;
321 
323  EvalType m_step_time = 0;
324 
329  using gradient_manager_ptr = std::unique_ptr<gradient_manager_type>;
330  std::unordered_map<std::type_index, gradient_manager_ptr> gradients_;
331 };
332 
333 } // namespace lbann
334 
335 #endif // LBANN_OPTIMIZERS_OPTIMIZER_HPP_INCLUDED
lbann_comm * m_comm
LBANN communicator.
Definition: optimizer.hpp:304
void set_status(optimizer_gradient_status s) noexcept
Definition: optimizer.hpp:221
std::unordered_map< std::type_index, gradient_manager_ptr > gradients_
Definition: optimizer.hpp:330
std::unordered_set< const void * > & get_gradient_sources()
Definition: optimizer.hpp:271
Inject polymorphic clone functions into hierarchies.
Definition: cloneable.hpp:94
void set_comm(lbann_comm &comm)
Definition: optimizer.hpp:275
void set_gradient_status(const optimizer_gradient_status status)
Definition: optimizer.hpp:267
GradientHelperImpl(El::Int height, El::Int width, El::DistData dist_data)
Definition: optimizer.hpp:239
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
Generates nicely formatted description messages.
Definition: description.hpp:49
Abstract base class for gradient-based optimization algorithms.
Definition: optimizer.hpp:85
Values can be accessed immediately.
void set_step_time(EvalType time)
Definition: optimizer.hpp:277
El::AbstractDistMatrix< TensorDataType > AbsDistMatType
Definition: optimizer.hpp:236
std::unique_ptr< AbsDistMatType > gradient_
Definition: optimizer.hpp:254
std::unordered_set< const void * > m_gradient_sources
Sources of gradient contributions.
Definition: optimizer.hpp:316
AbsDistMatType const & gradient() const noexcept override
Definition: optimizer.hpp:245
Manage gradient information.
Definition: optimizer.hpp:216
virtual void reset_counters()
Reset stats counters.
Definition: optimizer.hpp:200
lbann_comm & get_comm()
Communicator access.
Definition: optimizer.hpp:187
Allreduce is needed before accessing values.
const lbann_comm & get_comm() const
Access LBANN communicator.
Definition: optimizer.hpp:190
optimizer_gradient_status get_status() const noexcept
Definition: optimizer.hpp:220
EvalType get_step_time() const
Statistics access and management.
Definition: optimizer.hpp:197
optimizer_gradient_status get_gradient_status() const
Return the current gradient status.
Definition: optimizer.hpp:263
std::unique_ptr< gradient_manager_type > gradient_manager_ptr
Definition: optimizer.hpp:329
void inc_step_time(EvalType time)
Definition: optimizer.hpp:279
optimizer_gradient_status
Status of values in objective function gradient.
Definition: optimizer.hpp:52
AbsDistMatType & gradient() noexcept override
Definition: optimizer.hpp:244
Allreduce on values is in progress.
std::string to_string(optimizer_gradient_status status)
Human-readable string for status of gradient in optimizer.
double EvalType
Definition: base.hpp:189