27 #ifndef LBANN_OPTIMIZERS_HYPERGRADIENT_ADAM_HPP_INCLUDED 28 #define LBANN_OPTIMIZERS_HYPERGRADIENT_ADAM_HPP_INCLUDED 32 #include "lbann/proto/optimizers.pb.h" 44 template <
typename TensorDataType>
46 data_type_optimizer<TensorDataType>>
80 TensorDataType init_learning_rate = El::To<TensorDataType>(1e-3),
81 TensorDataType hyper_learning_rate = El::To<TensorDataType>(1e-7),
82 TensorDataType beta1 = El::To<TensorDataType>(0.9),
83 TensorDataType beta2 = El::To<TensorDataType>(0.99),
84 TensorDataType eps = El::To<TensorDataType>(1e-8));
90 template <
class Archive>
94 std::string
get_type()
const override {
return "hypergradient Adam"; }
102 void write_proto(lbann_data::Optimizer& opt)
const final;
130 template <
typename TensorDataType>
131 std::unique_ptr<optimizer>
136 #endif // LBANN_OPTIMIZER_HYPERGRADIENT_ADAM_HPP_INCLUDED TensorDataType m_beta1
Update factor for first moment estimate.
Inject polymorphic clone functions into hierarchies.
void step_compute(AbsDistMatrixType &values, const AbsDistMatrixType &gradient) override
Computation for an optimization step.
void setup(weights *w) override
Must be called before training.
void serialize(Archive &ar)
hypergradient_adam(TensorDataType init_learning_rate=El::To< TensorDataType >(1e-3), TensorDataType hyper_learning_rate=El::To< TensorDataType >(1e-7), TensorDataType beta1=El::To< TensorDataType >(0.9), TensorDataType beta2=El::To< TensorDataType >(0.99), TensorDataType eps=El::To< TensorDataType >(1e-8))
Construct a Hypergradient Adam optimizer object.
Generates nicely formatted description messages.
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
TensorDataType m_hyper_learning_rate
Hypergradient learning rate.
void write_proto(lbann_data::Optimizer &opt) const final
TensorDataType m_eps
Small factor to avoid division by zero.
std::unique_ptr< optimizer > build_hypergradient_adam_optimizer_from_pbuf(google::protobuf::Message const &)
description get_description() const override
Human-readable description.
Hypergradient Adam optimizer.
TensorDataType m_beta2
Update factor for second moment estimate.
std::unique_ptr< AbsDistMatrixType > m_old_gradient
Gradient estimate from the prior step (for hypergradient).
std::unique_ptr< AbsDistMatrixType > m_moment1
First moment estimates.
std::unique_ptr< AbsDistMatrixType > m_moment2
Second moment estimates.
hypergradient_adam & operator=(const hypergradient_adam &other)
TensorDataType m_current_beta2
beta2 ^ iteration.
void setup(WeightsType *w=nullptr) override
std::string get_type() const override
Human-readable type name.
TensorDataType m_current_beta1
beta1 ^ iteration.
~hypergradient_adam() override=default