LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
learning_rate.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
25 //
26 // lbann_learning_rate .hpp .cpp - Callback hooks for learning rate schedules
28 
29 #ifndef LBANN_CALLBACKS_LEARNING_RATE_HPP_INCLUDED
30 #define LBANN_CALLBACKS_LEARNING_RATE_HPP_INCLUDED
31 
33 #include <unordered_map>
34 #include <unordered_set>
35 
36 namespace lbann {
37 
38 // Forward declarations
39 class optimizer;
40 
41 namespace callback {
42 
43 // Different schedules should inherit from learning_rate.
44 
50 {
51 public:
52  learning_rate();
53  learning_rate(const learning_rate&) = default;
54  learning_rate& operator=(const learning_rate&) = default;
56  learning_rate(std::vector<std::string> weights_names);
58  void setup(model* m) override;
60  void on_epoch_end(model* m) override;
61 
64  void on_backward_prop_end(model* m) override;
65 
66 protected:
67  std::vector<std::string> const& get_weights_names() const
68  {
69  return m_weights_names;
70  }
71 
72 protected:
80  virtual float global_schedule(model* m)
81  {
83  }
84 
90  virtual float optimizer_schedule(model* m, optimizer& opt);
91 
92  const std::unordered_set<weights*>& get_weights() const noexcept
93  {
94  return m_weights;
95  }
96 
97  static float get_current_global_learning_rate() noexcept
98  {
99  return m_cur_global_lr;
100  }
101 
102  static void update_global_learning_rate(float rate) noexcept
103  {
104  m_cur_global_lr = rate;
105  }
106 
107 private:
114  static float m_cur_global_lr;
115 
117  std::vector<std::string> m_weights_names;
118 
120  std::unordered_set<weights*> m_weights;
121 };
122 
127 {
128 public:
130  step_learning_rate(size_t step, float amt);
131  step_learning_rate(size_t step,
132  float amt,
133  std::vector<std::string> weights_names);
134  step_learning_rate(const step_learning_rate&) = default;
136  step_learning_rate* copy() const override
137  {
138  return new step_learning_rate(*this);
139  }
140  std::string name() const override { return "step learning rate"; }
141 
142 protected:
143  float global_schedule(model* m) override;
144 
145 private:
147  void write_specific_proto(lbann_data::Callback& proto) const final;
148 
150  size_t m_step;
152  float m_amt;
153 };
154 
155 // Builder function
156 std::unique_ptr<callback_base> build_step_learning_rate_callback_from_pbuf(
157  const google::protobuf::Message&,
158  std::shared_ptr<lbann_summary> const&);
159 
164 {
165 public:
166  set_learning_rate(size_t step, float val);
167  set_learning_rate(size_t step,
168  float val,
169  std::vector<std::string> weights_names);
170  set_learning_rate(const set_learning_rate&) = default;
171  set_learning_rate& operator=(const set_learning_rate&) = default;
172  set_learning_rate* copy() const override
173  {
174  return new set_learning_rate(*this);
175  }
176  std::string name() const override { return "step learning rate"; }
177 
178 protected:
179  float global_schedule(model* m) override;
180 
181 private:
183  void write_specific_proto(lbann_data::Callback& proto) const final;
185  size_t m_step;
187  float m_val;
188 };
189 
190 // Builder function
191 std::unique_ptr<callback_base> build_set_learning_rate_callback_from_pbuf(
192  const google::protobuf::Message&,
193  std::shared_ptr<lbann_summary> const&);
194 
200 {
201 public:
206  adaptive_learning_rate(size_t patience, float amt);
207  adaptive_learning_rate(size_t patience,
208  float amt,
209  std::vector<std::string> weights_names);
212  adaptive_learning_rate* copy() const override
213  {
214  return new adaptive_learning_rate(*this);
215  }
216  std::string name() const override { return "adaptive learning rate"; }
217 
218 protected:
219  float global_schedule(model* m) override;
220 
221 private:
223  void write_specific_proto(lbann_data::Callback& proto) const final;
224 
226  size_t m_patience;
228  float m_amt;
230  size_t m_cur_epoch = std::numeric_limits<size_t>::max();
232  EvalType m_last_score = std::numeric_limits<EvalType>::max();
234  size_t m_wait = 0;
236  bool m_adjust_learning_rate = false;
237 };
238 
239 // Builder function
240 std::unique_ptr<callback_base> build_adaptive_learning_rate_callback_from_pbuf(
241  const google::protobuf::Message&,
242  std::shared_ptr<lbann_summary> const&);
243 
248 {
249 public:
254  drop_fixed_learning_rate(std::vector<size_t> drop_epochs, float amt);
255  drop_fixed_learning_rate(std::vector<size_t> drop_epochs,
256  float amt,
257  std::vector<std::string> weights_names);
260  operator=(const drop_fixed_learning_rate&) = default;
261  drop_fixed_learning_rate* copy() const override
262  {
263  return new drop_fixed_learning_rate(*this);
264  }
265  std::string name() const override { return "drop fixed learning rate"; }
266 
267 protected:
268  float global_schedule(model* m) override;
269 
270 private:
272  void write_specific_proto(lbann_data::Callback& proto) const final;
273 
275  float m_amt;
280  std::vector<size_t> m_drop_epochs;
281 };
282 
283 // Builder function
284 std::unique_ptr<callback_base>
286  const google::protobuf::Message&,
287  std::shared_ptr<lbann_summary> const&);
288 
297 {
298 public:
302  linear_growth_learning_rate(float target, size_t num_epochs);
303  linear_growth_learning_rate(float target, size_t num_epochs, size_t delay);
304  linear_growth_learning_rate(float target,
305  size_t num_epochs,
306  size_t delay,
307  std::vector<std::string> weights_names);
310  operator=(const linear_growth_learning_rate&) = default;
312  {
313  return new linear_growth_learning_rate(*this);
314  }
315  void setup(model* m) override;
316  std::string name() const override { return "linear growth learning rate"; }
317 
318 protected:
319  float global_schedule(model* m) override;
320 
321 private:
323  void write_specific_proto(lbann_data::Callback& proto) const final;
324 
326  float m_base_lr;
328  float m_target;
330  float m_inc;
332  size_t m_num_epochs;
334  size_t m_delay;
335 };
336 
337 // Builder function
338 std::unique_ptr<callback_base>
340  const google::protobuf::Message&,
341  std::shared_ptr<lbann_summary> const&);
342 
350 {
351 public:
352  poly_learning_rate(double p, size_t n_epochs, size_t max_iter);
353  poly_learning_rate(double p,
354  size_t n_epochs,
355  size_t max_iter,
356  double endl_r,
357  std::vector<std::string> weights_names);
358  poly_learning_rate(const poly_learning_rate&) = default;
360  poly_learning_rate* copy() const override
361  {
362  return new poly_learning_rate(*this);
363  }
364  void setup(model* m) override;
365  std::string name() const override { return "poly learning rate"; }
366 
367 protected:
368  float global_schedule(model* m) override;
369  float optimizer_schedule(model* m, optimizer& opt) override;
370 
371 private:
373  void write_specific_proto(lbann_data::Callback& proto) const final;
374 
376  double m_p;
378  size_t m_num_epochs;
380  size_t m_max_iter;
382  float m_start_lr;
384  float m_end_lr;
385 };
386 
387 // Builder function
388 std::unique_ptr<callback_base> build_poly_learning_rate_callback_from_pbuf(
389  const google::protobuf::Message&,
390  std::shared_ptr<lbann_summary> const&);
391 
400 {
401 public:
404  std::vector<std::string> weights_names);
406  const optimizerwise_adaptive_learning_rate&) = default;
410  {
411  return new optimizerwise_adaptive_learning_rate(*this);
412  }
413  std::string name() const override
414  {
415  return "optimizerwise adaptive learning rate";
416  }
417 
418 protected:
419  float optimizer_schedule(model* m, optimizer& opt) override;
420 
421 private:
423  void write_specific_proto(lbann_data::Callback& proto) const final;
424 
425  float m_scale;
426 };
427 
428 // Builder function
429 std::unique_ptr<callback_base>
431  const google::protobuf::Message&,
432  std::shared_ptr<lbann_summary> const&);
433 
451 {
452 public:
453  cosine_decay_learning_rate(double lr_max,
454  double lr_min,
455  size_t decay_steps,
456  double initial_learning_rate = 0.0,
457  size_t warmup_steps = 0);
458  cosine_decay_learning_rate(double lr_max,
459  double lr_min,
460  size_t decay_steps,
461  double initial_learning_rate,
462  size_t warmup_steps,
463  std::vector<std::string> weight_names);
466  operator=(const cosine_decay_learning_rate&) = default;
468  {
469  return new cosine_decay_learning_rate(*this);
470  }
471  void setup(model* m) override;
472  std::string name() const override { return "cosine decay learning rate"; }
473 
474 protected:
475  float global_schedule(model* m) override;
476  float optimizer_schedule(model* m, optimizer& opt) override;
477 
478 private:
480  void write_specific_proto(lbann_data::Callback& proto) const final;
481 
483  float m_lr_max;
485  float m_lr_min;
492 };
493 
494 // Builder function
495 std::unique_ptr<callback_base>
497  const google::protobuf::Message&,
498  std::shared_ptr<lbann_summary> const&);
499 
500 } // namespace callback
501 } // namespace lbann
502 
503 #endif // LBANN_CALLBACKS_LEARNING_RATE_HPP_INCLUDED
std::string name() const override
Return this callback&#39;s name.
static void update_global_learning_rate(float rate) noexcept
std::unique_ptr< callback_base > build_poly_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
std::unique_ptr< callback_base > build_cosine_decay_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
size_t m_warmup_steps
Number of warmup steps.
cosine_decay_learning_rate * copy() const override
std::string name() const override
Return this callback&#39;s name.
std::vector< std::string > const & get_weights_names() const
std::unique_ptr< callback_base > build_adaptive_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
std::string name() const override
Return this callback&#39;s name.
float m_target
Target learning rate to reach.
learning_rate & operator=(const learning_rate &)=default
std::vector< std::string > m_weights_names
std::string name() const override
Return this callback&#39;s name.
float m_inc
Amount to increase each epoch.
std::string name() const override
Return this callback&#39;s name.
std::string name() const override
Return this callback&#39;s name.
double m_p
The exponent to compute new learning rate in poly policy.
float m_lr_min
The learning rate after cosine decay.
size_t m_decay_steps
The number of steps for decay.
const std::unordered_set< weights * > & get_weights() const noexcept
size_t m_delay
Number of epochs to delay before starting growth.
Abstract base class for gradient-based optimization algorithms.
Definition: optimizer.hpp:85
drop_fixed_learning_rate * copy() const override
std::unique_ptr< callback_base > build_optimizerwise_adaptive_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
Base class for callbacks during training/testing.
Definition: callback.hpp:76
virtual float optimizer_schedule(model *m, optimizer &opt)
std::unordered_set< weights * > m_weights
Abstract base class for neural network models.
Definition: model.hpp:83
adaptive_learning_rate * copy() const override
virtual float global_schedule(model *m)
float m_amt
Amount to decrease the learning rate by.
static float get_current_global_learning_rate() noexcept
std::unique_ptr< callback_base > build_set_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
void on_epoch_end(model *m) override
linear_growth_learning_rate * copy() const override
virtual void write_specific_proto(lbann_data::Callback &proto) const =0
Add callback specific data to prototext.
std::unique_ptr< callback_base > build_drop_fixed_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
set_learning_rate * copy() const override
float m_initial_lr
The initial learning rate for warmup. Relevant only if m_warmup_steps > 0.
float m_lr_max
The starting learning rate before decay.
virtual void on_backward_prop_end(model *m)
Called when a model ends backward propagation.
Definition: callback.hpp:152
float m_end_lr
The final learning rate.
std::string name() const override
Return this callback&#39;s name.
size_t m_num_epochs
Number of epochs over which to scale the learning rate.
void on_backward_prop_end(model *m) override
poly_learning_rate * copy() const override
optimizerwise_adaptive_learning_rate * copy() const override
std::unique_ptr< callback_base > build_step_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
size_t m_num_epochs
The number of epochs for training.
std::string name() const override
Return this callback&#39;s name.
step_learning_rate * copy() const override
double EvalType
Definition: base.hpp:189
size_t m_max_iter
The maximum number of iterations until which the learning rate changes.
void setup(model *m) override
std::unique_ptr< callback_base > build_linear_growth_learning_rate_callback_from_pbuf(const google::protobuf::Message &, std::shared_ptr< lbann_summary > const &)
float m_start_lr
The initial learning rate.