LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
input_layer.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYERS_INPUT_LAYER_HPP_INCLUDED
28 #define LBANN_LAYERS_INPUT_LAYER_HPP_INCLUDED
29 
33 #include "lbann/utils/distconv.hpp"
34 
35 namespace lbann {
36 
37 #ifdef LBANN_HAS_DISTCONV
38 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
39 class input_distconv_adapter : public data_type_distconv_adapter<TensorDataType>
40 {
41 public:
42  using TensorDevType =
44  using TensorHost = dc::TensorHost<TensorDataType>;
45  using TensorHostShuffler = dc::TensorHostShuffler<TensorDataType>;
46 
47  input_distconv_adapter(Layer& layer,
48  data_field_type data_field,
49  const bool shuffle_required);
50  virtual ~input_distconv_adapter() = default;
51 
52  void setup_layer(size_t workspace_capacity) override;
53 
54  TensorHostShuffler& get_shuffler(const TensorHost& src,
55  const TensorHost& dst);
56  void setup_fp_tensors() override;
57  std::unique_ptr<TensorDevType> setup_activations_i(int index) const override;
58  dc::Shape get_activations_local_shape(int index) const override;
59  dc::Shape get_activations_shape(int index) const override;
60  void setup_shuffler_buffers(const TensorHost& src, const TensorHost& dst);
61 
62  // No bp tensors needed for this layer.
63  void setup_prev_error_signals() override {}
64  void setup_original_prev_error_signals() override {}
65  void setup_error_signals() override {}
66  void setup_original_error_signals() override {}
67  void setup_bp_tensors() override {}
68 
69  bool child_copy_required(size_t output_index) const override;
70  bool child_shuffle_required(size_t output_index) const override;
71 
72  // Nothing to do here as everything is done in fp_compute_distconv.
73  void fp_setup() override {}
74  void fp_compute();
75 
76 private:
78  data_field_type m_data_field;
79 
80  bool m_is_input_processed;
81  std::unique_ptr<TensorHost> m_original_host_tensor;
82  std::unique_ptr<TensorHost> m_host_tensor;
83 
84  const bool m_shuffle_required;
85  std::array<std::unique_ptr<TensorHostShuffler>, 4> m_shufflers;
86  std::unique_ptr<TensorDataType> m_shuffler_src_buf;
87  size_t m_shuffler_src_buf_size = 0;
88  std::unique_ptr<TensorDataType> m_shuffler_dst_buf;
89  size_t m_shuffler_dst_buf_size = 0;
90 
91  // TODO: Use pinned memory pool
92  TensorDataType* m_copy_pinned_buffer = nullptr;
93 };
94 #endif // LBANN_HAS_DISTCONV
95 
97 template <typename TensorDataType,
99  El::Device Dev = El::Device::CPU>
100 class input_layer : public data_type_layer<TensorDataType>
101 {
102  static_assert(T_layout == data_layout::DATA_PARALLEL,
103  "input layer only supports DATA_PARALLEL data layout");
104 
105 public:
107 
110  using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
111 
113 public:
115  input_layer(lbann_comm* comm, std::string const data_field = "")
116  : data_type_layer<TensorDataType>(comm), m_data_field(data_field)
117  {
118 
119  // Input layers have no parents
120  this->m_expected_num_parent_layers = 0;
121  this->m_expected_num_child_layers = 1;
122  }
123 
124  input_layer(const input_layer&) = default;
125  input_layer& operator=(const input_layer&) = default;
126  input_layer* copy() const override { return new input_layer(*this); }
127 
128  std::string get_type() const override { return "input"; }
129 
130 #ifdef LBANN_HAS_ONNX
131  void fill_onnx_node(onnx::GraphProto& graph) const override;
132 #endif // LBANN_HAS_ONNX
133 
134  // description get_description() const override {
135  // auto desc = io_layer<TensorDataType>::get_description();
136  // return desc;
137  // }
138  data_layout get_data_layout() const override { return T_layout; }
139  El::Device get_device_allocation() const override { return Dev; }
140  bool can_run_inplace() const override { return false; }
141  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
142 
143  void setup_dims() override;
144 
145  void setup_data(size_t max_mini_batch_size) override;
146 
150  void fp_setup_outputs() override;
151 
152  void fp_compute() override;
153 
157  void set_samples(const El::AbstractDistMatrix<TensorDataType>& samples);
158 
162  std::vector<El::Int> get_data_dims(const DataReaderMetaData& dr_metadata,
163  int child_index = 0) const;
164 
166 
168  template <typename ArchiveT>
169  void serialize(ArchiveT& ar);
170 
172 
173 protected:
175  void write_specific_proto(lbann_data::Layer& proto) const final;
176 
177 private:
178  friend cereal::access;
179  input_layer() : input_layer(nullptr) {}
180 
181  // This is to track if samples are loaded with set_samples(), if so the
182  // fp_compute() sample loading is no longer necessary
183  bool m_samples_loaded = false;
184 
186 
187 #ifdef LBANN_HAS_DISTCONV
188 public:
190  using distconv_adapter_type =
192  input_distconv_adapter<TensorDataType, T_layout, Dev>;
193  friend distconv_adapter_type;
194 
195 protected:
196  bool is_distconv_supported() const override
197  {
198  return Dev == El::Device::CPU && T_layout == data_layout::DATA_PARALLEL;
199  }
200  void setup_distconv_adapter() override;
201  distconv_adapter_type& get_distconv_adapter() override;
202  const distconv_adapter_type& get_distconv_adapter() const override;
203  bool keep_original_outputs(int index) const override;
204  bool keep_original_gradient_wrt_outputs(int index) const override;
206 #endif // LBANN_HAS_DISTCONV
207 };
208 
210 
211 #ifndef LBANN_INPUT_LAYER_INSTANTIATE
212 
213 #define PROTO_DEVICE(T, Device) \
214  extern template class input_layer<T, data_layout::DATA_PARALLEL, Device>
215 
217 #undef PROTO_DEVICE
218 
219 #endif // LBANN_INPUT_LAYER_INSTANTIATE
220 
221 } // namespace lbann
222 
223 #endif // LBANN_LAYERS_INPUT_LAYER_HPP_INCLUDED
Interface with data reader.
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
constexpr El::Device Device
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
std::string get_type() const override
Get the layer type&#39;s name.
::distconv::tensor::Shape Shape
input_layer(lbann_comm *comm, std::string const data_field="")
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
data_field_type m_data_field
std::string data_field_type
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
input_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
LBANN_DEFINE_LAYER_BUILDER(elu)
dc::TensorDev< OutputTensorDataType > TensorDevType
Data structure containing metadata from the data readers.
Definition: metadata.hpp:82