LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
layers/regularizers/dropout.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYER_REGULARIZER_DROPOUT_HPP_INCLUDED
28 #define LBANN_LAYER_REGULARIZER_DROPOUT_HPP_INCLUDED
29 
31 #ifdef LBANN_HAS_DNN_LIB
34 #endif // LBANN_HAS_DNN_LIB
36 
37 namespace lbann {
38 
50 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
51 class dropout : public data_type_layer<TensorDataType>
52 {
53 public:
55 
58  using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
59 
61 
62 public:
64  dropout(EvalType keep_prob = EvalType(0.5))
65  : data_type_layer<TensorDataType>(nullptr),
66  m_keep_prob(keep_prob)
67 #ifdef LBANN_HAS_DNN_LIB
68  ,
69  m_tensors_dnn_desc(this)
70 #endif // LBANN_HAS_DNN_LIB
71  {}
72 
73  dropout(const dropout& other)
74  : data_type_layer<TensorDataType>(other),
75  m_keep_prob(other.m_keep_prob),
76  m_mask(other.m_mask ? other.m_mask->Copy() : nullptr)
77 #ifdef LBANN_HAS_DNN_LIB
78  ,
79  m_tensors_dnn_desc(other.m_tensors_dnn_desc)
80 #endif // LBANN_HAS_DNN_LIB
81  {
82 #ifdef LBANN_HAS_DNN_LIB
83  m_tensors_dnn_desc.set_layer(this);
84  m_states = other.m_states;
85  m_reserve_space = other.m_reserve_space;
86  if (other.m_dropout_dnn_desc != nullptr) {
87  setup_dropout_dnn_desc();
88  }
89 #endif // LBANN_HAS_DNN_LIB
90  }
91 
92  dropout& operator=(const dropout& other)
93  {
95  m_keep_prob = other.m_keep_prob;
96  m_mask = other.m_mask
97  ? std::unique_ptr<AbsDistMatrixType>(other.m_mask->Copy())
98  : nullptr;
99 #ifdef LBANN_HAS_DNN_LIB
100  m_tensors_dnn_desc = other.m_tensors_dnn_desc;
101  m_tensors_dnn_desc.set_layer(this);
102  m_states = other.m_states;
103  m_reserve_space = other.m_reserve_space;
104  if (other.m_dropout_dnn_desc != nullptr) {
105  setup_dropout_dnn_desc();
106  }
107 #endif // LBANN_HAS_DNN_LIB
108  return *this;
109  }
110 
111  ~dropout() override = default;
112 
113  dropout* copy() const override { return new dropout(*this); }
114  std::string get_type() const override { return "dropout"; }
115  data_layout get_data_layout() const override { return T_layout; }
116  El::Device get_device_allocation() const override { return Dev; }
117  bool can_run_inplace() const override { return true; }
118  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
119 
120  description get_description() const override
121  {
123  desc.add("Keep probability", m_keep_prob);
124  return desc;
125  }
127  EvalType get_keep_prob() const { return m_keep_prob; }
129  void set_keep_prob(EvalType keep_prob) { m_keep_prob = keep_prob; }
130 
132 
134  template <typename ArchiveT>
135  void serialize(ArchiveT& ar);
136 
138 
139 protected:
141  void write_specific_proto(lbann_data::Layer& proto) const final;
142 
143  void setup_dims() override
144  {
146  this->set_output_dims(this->get_input_dims());
147  }
148 
149  void setup_data(size_t max_mini_batch_size) override
150  {
151  data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);
152  m_mask = std::unique_ptr<AbsDistMatrixType>(this->get_activations().Copy());
153  }
154 
155  void setup_gpu() override
156  {
158 #ifndef LBANN_HAS_DNN_LIB
159  LBANN_ERROR("DNN library not detected");
160 #else
161 
162 #ifdef LBANN_DETERMINISTIC
163  if (this->get_comm()->am_trainer_master()) {
165  LBANN_WARNING(this->get_type(),
166  " layer \"",
167  this->get_name(),
168  "\" ",
169  "does not guarantee sequential consistency");
170  }
171 #endif // LBANN_DETERMINISTIC
172 
173  // Initialize DNN library objects
174  setup_dropout_dnn_desc();
175 
176 #endif // LBANN_HAS_DNN_LIB
177  }
178 
179  void fp_compute() override
180  {
181  if (this->using_gpus()) {
182  fp_compute_gpu();
183  }
184  else {
185  fp_compute_cpu();
186  }
187  }
188 
189  void bp_compute() override
190  {
191  if (this->using_gpus()) {
192  bp_compute_gpu();
193  }
194  else {
195  bp_compute_cpu();
196  }
197  }
198 
199 private:
200  void fp_compute_cpu();
201 
203  void bp_compute_cpu();
204 
205  void fp_compute_gpu();
206 
207  void bp_compute_gpu();
208 
209 #ifdef LBANN_HAS_DNN_LIB
210 
212  void setup_dropout_dnn_desc()
213  {
214 
215  // Setup RNG state
216  size_t size = dnn_lib::get_dropout_states_size();
217  m_states.Resize((size + sizeof(TensorDataType) - 1) /
218  sizeof(TensorDataType),
219  1);
220 
221  // Setup dropout descriptor
222  m_dropout_dnn_desc.set(float(1 - m_keep_prob),
223  m_states.Buffer(),
224  m_states.Height() * sizeof(TensorDataType),
225  get_generator()());
226  }
227 #endif // LBANN_HAS_DNN_LIB
228 
232  std::unique_ptr<AbsDistMatrixType> m_mask;
233 
234 #ifdef LBANN_HAS_DNN_LIB
235 
236  dnn_lib::DropoutDescriptor m_dropout_dnn_desc;
240  El::Matrix<TensorDataType, El::Device::GPU> m_states;
242  El::Matrix<TensorDataType, El::Device::GPU> m_reserve_space;
243 #endif // LBANN_HAS_DNN_LIB
244 };
245 
246 template <typename T, data_layout L, El::Device D>
248 
250 
251 #ifndef LBANN_DROPOUT_LAYER_INSTANTIATE
252 #define PROTO_DEVICE(T, Device) \
253  extern template class dropout<T, data_layout::DATA_PARALLEL, Device>; \
254  extern template class dropout<T, data_layout::MODEL_PARALLEL, Device>
255 
257 #undef PROTO_DEVICE
258 #endif // LBANN_DROPOUT_LAYER_INSTANTIATE
259 
260 } // namespace lbann
261 
262 #endif // LBANN_LAYER_REGULARIZER_DROPOUT_HPP_INCLUDED
dropout * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
description get_description() const override
Human-readable description.
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
lbann_comm * get_comm() const
void fp_compute_cpu()
#define LBANN_ERROR(...)
Definition: exception.hpp:37
void bp_compute_gpu()
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
void serialize(ArchiveT &ar)
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Generates nicely formatted description messages.
Definition: description.hpp:49
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
virtual description get_description() const
Human-readable description.
dropout(const dropout &other)
constexpr El::Device Device
void write_specific_proto(lbann_data::Layer &proto) const final
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override
~dropout() override=default
dropout & operator=(const dropout &other)
void bp_compute_cpu()
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
virtual void setup_gpu()
Setup GPU objects. Called by the &#39;setup&#39; function if the layer is on GPUs.
Definition: layer.hpp:782
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
void set_keep_prob(EvalType keep_prob)
set prob for keep each unit.
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
std::string get_type() const override
Get the layer type&#39;s name.
Probabilistically drop layer outputs.
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
#define LBANN_WARNING(...)
Definition: exception.hpp:53
bool using_gpus() const noexcept
Whether the layer is using a GPU implementation.
Definition: layer.hpp:417
std::unique_ptr< AbsDistMatrixType > m_mask
void setup_data(size_t max_mini_batch_size) override
LBANN_DEFINE_LAYER_BUILDER(elu)
void fp_compute_gpu()
rng_gen & get_generator()
void setup_gpu() override
Setup GPU objects. Called by the &#39;setup&#39; function if the layer is on GPUs.
EvalType get_keep_prob() const
get prob for keep each unit.
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
data_type_layer & operator=(data_type_layer &&other)=default
dropout(EvalType keep_prob=EvalType(0.5))
double EvalType
Definition: base.hpp:189
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...