LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
gru.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYERS_LEARNING_GRU_HPP_INCLUDED
28 #define LBANN_LAYERS_LEARNING_GRU_HPP_INCLUDED
29 
31 #ifdef LBANN_HAS_DNN_LIB
33 #endif // LBANN_HAS_DNN_LIB
34 #ifdef LBANN_HAS_ONEDNN
36 #endif // LBANN_HAS_ONEDNN
37 
38 // Supported implementations -- See lbann_config.h
39 
40 namespace lbann {
41 
62 template <typename TensorDataType, data_layout Layout, El::Device Device>
63 class gru_layer : public data_type_layer<TensorDataType>
64 {
65 
66  static_assert(Layout == data_layout::DATA_PARALLEL,
67  "GRU layer only supports data parallel layout");
68 
69 public:
70  gru_layer(size_t hidden_size, size_t num_layers);
71 
72  gru_layer(const gru_layer& other);
73  gru_layer& operator=(const gru_layer& other);
74  ~gru_layer() = default;
75 
76  gru_layer* copy() const override;
77  std::string get_type() const override;
78  data_layout get_data_layout() const override;
79  El::Device get_device_allocation() const override;
80  bool can_run_inplace() const override { return false; }
81  int get_backprop_requirements() const override
82  {
83  return ERROR_SIGNALS | WEIGHTS;
84  }
85 
86  description get_description() const override;
87 
89 
91  template <typename ArchiveT>
92  void serialize(ArchiveT& ar);
93 
95 
96  size_t get_hidden_size() const { return m_hidden_size; }
97  size_t get_num_layers() const { return m_num_layers; }
98  const hydrogen::simple_buffer<El::byte, Device>& get_reserve_space() const;
99 
100 protected:
102  void write_specific_proto(lbann_data::Layer& proto) const final;
103 
104  friend class cereal::access;
105  gru_layer() : gru_layer(0, 0) {}
106 
107  void setup_dims() override;
108  void setup_data(size_t max_mini_batch_size) override;
109 
110  void fp_compute() override;
111  void bp_compute() override;
112 
113 private:
117  size_t m_num_layers;
118 
119 #ifdef LBANN_GRU_LAYER_ONEDNN_CPU_SUPPORTED
120 
121 
124  struct OnednnCpuObjects
125  {
126 
127  // Typedefs
128  using Backend = onednn_backend<El::Device::CPU>;
129  using TensorDesc = Backend::TensorDescriptor;
130 
131  // Descriptors
132  ::dnnl::lbr_gru_forward::primitive_desc gru_forward_primitive_desc;
133  ::dnnl::lbr_gru_forward::primitive gru_forward_primitive;
134  ::dnnl::lbr_gru_backward::primitive_desc gru_backward_primitive_desc;
135  ::dnnl::lbr_gru_backward::primitive gru_backward_primitive;
136  TensorDesc input_sequence_desc;
137  TensorDesc init_hidden_desc;
138  TensorDesc output_sequence_desc;
139  TensorDesc final_hidden_desc;
140  TensorDesc input_sequence_grad_desc;
141  TensorDesc init_hidden_grad_desc;
142  TensorDesc output_sequence_grad_desc;
143  TensorDesc final_hidden_grad_desc;
144 
145  // Workspaces
146  TensorDesc forward_ih_matrix_weights;
147  TensorDesc forward_hh_matrix_weights;
148  TensorDesc backward_ih_matrix_weights;
149  TensorDesc backward_hh_matrix_weights;
150  TensorDesc bias_weights;
151  TensorDesc ih_matrix_weights_grad;
152  TensorDesc hh_matrix_weights_grad;
153  TensorDesc bias_weights_grad;
154  TensorDesc workspace;
155  };
156 
158  std::unique_ptr<OnednnCpuObjects> m_onednn_cpu_objects;
159 
161  void setup_onednn_cpu();
162 
164 #endif // LBANN_GRU_LAYER_ONEDNN_CPU_SUPPORTED
165 
166 #ifdef LBANN_GRU_LAYER_CUDNN_SUPPORTED
167 
168 
171  struct CudnnObjects
172  {
173 
174  // Typedefs
175  using ByteBuffer = hydrogen::simple_buffer<El::byte, El::Device::GPU>;
176  using IntBuffer = hydrogen::simple_buffer<int32_t, El::Device::GPU>;
177  using LocalMat = El::Matrix<TensorDataType, El::Device::GPU>;
178  using GraphCache =
179  std::unordered_map<size_t, std::pair<size_t, cuda::ExecutableGraph>>;
180 
181  // Descriptors
182  dnn_lib::RNNDescriptor rnn_desc;
183  dnn_lib::RNNDataDescriptor input_desc;
184  dnn_lib::RNNDataDescriptor output_desc;
185  dnn_lib::TensorDescriptor hidden_desc;
186 
187  // Workspaces
188  LocalMat input_sequence_workspace;
189  LocalMat output_sequence_workspace;
190  LocalMat input_sequence_grad_workspace;
191  LocalMat output_sequence_grad_workspace;
192  LocalMat init_hidden_workspace;
193  LocalMat init_hidden_grad_workspace;
194  ByteBuffer weights_workspace;
195  ByteBuffer weights_grad_workspace;
196  ByteBuffer workspace;
197  ByteBuffer reserve_space;
198  IntBuffer gpu_sequence_lengths;
199 
205  GraphCache forward_prop_graph_cache;
211  GraphCache backward_prop_graph_cache;
212  };
213 
215  std::unique_ptr<CudnnObjects> m_cudnn_objects;
216 
218  void setup_cudnn();
219 
221 #endif // LBANN_GRU_LAYER_CUDNN_SUPPORTED
222 
223  template <typename T>
225  template <typename T>
227 };
228 
229 // Builder function
231 
232 // Explicit template instantiation
233 #ifndef LBANN_GRU_LAYER_INSTANTIATE
234 
235 #ifdef LBANN_GRU_LAYER_ONEDNN_CPU_SUPPORTED
236 #define PROTO(T) \
237  extern template class gru_layer<T, \
238  data_layout::DATA_PARALLEL, \
239  El::Device::CPU>;
240 #define LBANN_INSTANTIATE_CPU_HALF
242 #undef PROTO
243 #endif // LBANN_GRU_LAYER_ONEDNN_CPU_SUPPORTED
244 
245 #ifdef LBANN_GRU_LAYER_CUDNN_SUPPORTED
246 #define PROTO(T) \
247  extern template class gru_layer<T, \
248  data_layout::DATA_PARALLEL, \
249  El::Device::GPU>;
250 #define LBANN_INSTANTIATE_GPU_HALF
252 #undef PROTO
253 #endif // LBANN_GRU_LAYER_CUDNN_SUPPORTED
254 
255 #endif // LBANN_GRU_LAYER_INSTANTIATE
256 
257 } // namespace lbann
258 
259 #endif // LBANN_LAYERS_LEARNING_GRU_HPP_INCLUDED
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
void write_specific_proto(lbann_data::Layer &proto) const final
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
gru_layer & operator=(const gru_layer &other)
~gru_layer()=default
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
friend void bp_compute_impl(gru_layer< T, Layout, Device > &)
Generates nicely formatted description messages.
Definition: description.hpp:49
std::string get_type() const override
Get the layer type&#39;s name.
gru_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
size_t m_num_layers
Number of stacked GRU cells.
Definition: gru.hpp:117
constexpr El::Device Device
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: gru.hpp:81
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: gru.hpp:80
void serialize(ArchiveT &ar)
friend void fp_compute_impl(gru_layer< T, Layout, Device > &)
friend class cereal::access
Definition: gru.hpp:104
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Stacked gated recurrent unit.
Definition: gru.hpp:63
const hydrogen::simple_buffer< El::byte, Device > & get_reserve_space() const
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
size_t get_hidden_size() const
Definition: gru.hpp:96
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
LBANN_DEFINE_LAYER_BUILDER(elu)
size_t get_num_layers() const
Definition: gru.hpp:97
size_t m_hidden_size
Size of each hidden state and output vector.
Definition: gru.hpp:115
description get_description() const override
Human-readable description.