LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
slice.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYERS_TRANSFORM_SLICE_HPP_INCLUDED
28 #define LBANN_LAYERS_TRANSFORM_SLICE_HPP_INCLUDED
29 
31 #include "lbann/layers/layer.hpp"
32 #include "lbann/models/model.hpp"
34 #include "lbann/proto/layers.pb.h"
37 #include "lbann/utils/protobuf.hpp"
38 
39 namespace lbann {
40 
46 template <typename TensorDataType,
48  El::Device Device = El::Device::CPU>
49 class slice_layer : public data_type_layer<TensorDataType>
50 {
51 public:
52  slice_layer(lbann_comm* comm);
53  slice_layer(const slice_layer& other) = default;
54  slice_layer& operator=(const slice_layer& other) = default;
55 
56  slice_layer* copy() const override;
57 
59 
61  template <typename ArchiveT>
62  void serialize(ArchiveT& ar);
63 
65 
66  std::string get_type() const override;
67  data_layout get_data_layout() const override;
68  El::Device get_device_allocation() const override;
69  bool can_run_inplace() const override { return false; }
70  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
71 
72  description get_description() const override;
73 
74  void setup_slice_points(size_t slice_dim, std::vector<size_t> slice_points)
75  {
76  m_slice_dim = slice_dim;
77  m_slice_points = std::move(slice_points);
78  }
79 
80  void setup_slice_points(size_t slice_dim,
81  bool set_slice_points_from_data_reader,
82  const slice_points_mode var_category)
83  {
84  m_slice_dim = slice_dim;
85  m_set_slice_points_from_data_reader = set_slice_points_from_data_reader;
86  m_var_category = var_category;
87  }
88 
89 protected:
91  void write_specific_proto(lbann_data::Layer& proto) const final;
92 
93  El::SyncInfo<Device> syncSubGridCommunication = El::SyncInfo<Device>();
94 
95  friend class cereal::access;
96  slice_layer() : slice_layer(nullptr) {}
97 
98  void setup_dims() override;
99 
100  void fp_setup_outputs() override;
101  void bp_setup_gradient_wrt_inputs() override;
102  void fp_compute() override;
103  void bp_compute() override;
104  void fp_compute_subgrid();
105  void bp_compute_subgrid();
106 
107 private:
109  size_t m_slice_dim;
111  std::vector<size_t> m_slice_points;
116 
117 #ifdef LBANN_HAS_GPU
118 
123  std::shared_ptr<hydrogen::simple_buffer<unsigned char, El::Device::CPU>>
124  m_workspace;
130  gpu_lib::event_wrapper m_workspace_event;
131 #endif // LBANN_HAS_GPU
132 
133  template <typename U, El::Device D>
135  template <typename U>
137  template <typename U>
139 };
140 
141 // =========================================================
142 // Implementation
143 // =========================================================
144 
145 template <typename T, data_layout L, El::Device D>
146 void slice_layer<T, L, D>::write_specific_proto(lbann_data::Layer& proto) const
147 {
148  proto.set_datatype(proto::ProtoDataType<T>);
149  auto* msg = proto.mutable_slice();
150  msg->set_axis(m_slice_dim);
151  protobuf::assign_to_repeated(*msg->mutable_slice_points(), m_slice_points);
152 }
153 
154 template <typename TensorDataType, data_layout Layout, El::Device Device>
156  : data_type_layer<TensorDataType>(comm),
159 #ifdef LBANN_HAS_GPU
160  ,
161  m_workspace{
162  std::make_shared<hydrogen::simple_buffer<unsigned char, El::Device::CPU>>(
163  0UL,
164  hydrogen::SyncInfo<El::Device::CPU>{},
165  1U /*=pinned*/)}
166 #endif /* LBANN_HAS_GPU */
167 {
168  this->m_expected_num_child_layers = -1; // No limit on children
169 }
170 
171 template <typename TensorDataType, data_layout Layout, El::Device Device>
174 {
175  return new slice_layer(*this);
176 }
177 
178 template <typename TensorDataType, data_layout Layout, El::Device Device>
180 {
181  return "slice";
182 }
183 
184 template <typename TensorDataType, data_layout Layout, El::Device Device>
186 {
187  return Layout;
188 }
189 
190 template <typename TensorDataType, data_layout Layout, El::Device Device>
193 {
194  return Device;
195 }
196 
197 template <typename TensorDataType, data_layout Layout, El::Device Device>
199 {
201  desc.add("Slice dimension", m_slice_dim);
202  std::ostringstream ss;
203  for (size_t i = 0; i < m_slice_points.size(); ++i) {
204  ss << (i > 0 ? ", " : "") << m_slice_points[i];
205  }
206  desc.add("Slice points", ss.str());
207  return desc;
208 }
209 
210 template <typename TensorDataType, El::Device Device>
213 {
214 
215  // Slice Elemental matrices
216  // Note: Assume each mini-batch sample is flat.
217  const size_t num_outputs = l.get_num_children();
218  const auto& input = l.get_prev_activations();
219  size_t offset = l.m_slice_points.front();
220  for (size_t j = 0; j < num_outputs; ++j) {
221  auto& output = l.get_activations(j);
222  const auto& output_size = l.get_output_size(j);
223  El::LockedView(output,
224  input,
225  El::IR(offset, offset + output_size),
226  El::ALL);
227  offset += output_size;
228  }
229 }
230 
231 template <typename TensorDataType, El::Device Device>
234 {
235 
236  const size_t num_outputs = l.get_num_children();
237  const auto& input = l.get_prev_activations();
238  for (size_t j = 0; j < num_outputs; ++j) {
239  auto& output = l.get_activations(j);
240  // output.AlignWith(input);
241  output.Resize(l.get_output_size(j), input.Width());
242  }
243 }
244 
245 template <typename TensorDataType, data_layout Layout, El::Device Device>
247 {
248  fp_setup_outputs_impl(*this);
249 }
250 
251 template <typename TensorDataType, data_layout Layout, El::Device Device>
253 {
254  const auto& input_dims = this->get_input_dims();
255  const size_t num_dims = input_dims.size();
256  if (num_dims > 3) {
257  LBANN_ERROR(this->get_type(),
258  " layer \"",
259  this->get_name(),
260  "\" ",
261  "is operating on ",
262  num_dims,
263  "-D tensors, ",
264  "but only 3-D tensors are currently supported");
265  }
266 
267  const int split_dim = input_dims[this->m_slice_dim];
268 
269  if (this->m_slice_dim != num_dims - 1) {
270  LBANN_ERROR(this->get_type(),
271  " layer \"",
272  this->get_name(),
273  "\" ",
274  "has axis ",
275  this->m_slice_dim,
276  " However, ",
277  "Subgrpah parallelism is supported when split axis is the last "
278  "dimension");
279  }
280  const auto& input = this->get_prev_activations();
281 
282  auto const* ptr_input = dynamic_cast<
283  El::
284  DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Device> const*>(
285  &input);
286 
287  if (this->get_communication_flag() == COLL_OPT) {
288  El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
289  *ptr_input,
290  this->get_all_activations(),
291  split_dim,
292  this->get_subgrid_comm(),
294  3);
295  }
296  else if (this->get_communication_flag() == COLL) {
297  El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
298  *ptr_input,
299  this->get_all_activations(),
300  split_dim,
301  this->get_subgrid_comm(),
303  2);
304  }
305  else {
306  El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
307  *ptr_input,
308  this->get_all_activations(),
309  split_dim,
310  this->get_subgrid_comm(),
312  1);
313  }
314 }
315 
316 template <typename TensorDataType, data_layout Layout, El::Device Device>
318 {
319  const auto& input_dims = this->get_input_dims();
320  const size_t num_dims = input_dims.size();
321 
322  if (this->m_slice_dim == num_dims - 1 &&
325  }
326  else {
327  fp_compute_impl(*this);
328  }
329 }
330 
331 template <typename TensorDataType, data_layout Layout, El::Device Device>
333 {
334  const auto& input_dims = this->get_input_dims();
335 
336  const int split_dim =
337  int(input_dims[this->m_slice_dim] / this->get_num_children());
338 
339  auto& input_grad = this->get_error_signals();
340 
341  auto* ptr_input_grad = dynamic_cast<
342  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Device>*>(
343  &input_grad);
344 
345  El::copy::TranslateBetweenGridsGather<TensorDataType, Device, Device>(
346  *ptr_input_grad,
348  split_dim,
349  this->get_subgrid_comm(),
351 }
352 
353 template <typename TensorDataType, data_layout Layout, El::Device Device>
355 {
356  const auto& output0_grad = this->get_prev_error_signals(0);
357  auto& input_grad = this->get_error_signals();
358  input_grad.Empty(false);
359  input_grad.Resize(this->get_input_size(), output0_grad.Width());
360  El::Zeros(input_grad, this->get_input_size(), output0_grad.Width());
361 }
362 
363 template <typename TensorDataType, data_layout Layout, El::Device Device>
365 {
366 
367  const auto& input_dims = this->get_input_dims();
368  const size_t num_dims = input_dims.size();
369 
370  if (this->m_slice_dim == num_dims - 1 &&
373  }
374  else {
375  bp_compute_impl(*this);
376  }
377 }
378 
379 #ifndef LBANN_SLICE_LAYER_INSTANTIATE
380 #define PROTO_DEVICE(T, Device) \
381  extern template class slice_layer<T, data_layout::DATA_PARALLEL, Device>; \
382  extern template class slice_layer<T, data_layout::MODEL_PARALLEL, Device>
383 
385 #undef PROTO_DEVICE
386 #endif // LBANN_SLICE_LAYER_INSTANTIATE
387 
388 } // namespace lbann
389 
390 #endif // LBANN_LAYERS_TRANSFORM_SLICE_HPP_INCLUDED
void serialize(ArchiveT &ar)
El::SyncInfo< Device > syncSubGridCommunication
Definition: slice.hpp:93
bool m_set_slice_points_from_data_reader
Definition: slice.hpp:113
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Definition: slice_impl.hpp:36
slice_points_mode
Definition: metadata.hpp:66
#define LBANN_ERROR(...)
Definition: exception.hpp:37
int get_output_size(size_t output_index=0) const
Get output tensor size.
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Generates nicely formatted description messages.
Definition: description.hpp:49
void setup_slice_points(size_t slice_dim, bool set_slice_points_from_data_reader, const slice_points_mode var_category)
Definition: slice.hpp:80
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
Definition: slice.hpp:364
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: slice.hpp:192
virtual description get_description() const
Human-readable description.
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: slice.hpp:70
constexpr El::Device Device
OutputAbsDistMatrixType & get_prev_error_signals(int child_index=0)
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
Slice tensor along a specified dimension.
Definition: slice.hpp:49
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override
int m_expected_num_child_layers
Expected number of child layers. A negative value indicates no limit.
Definition: layer.hpp:842
int get_input_size(size_t input_index=0) const
Get input tensor size.
void assign_to_repeated(google::protobuf::RepeatedField< T > &field, ContainerT const &values)
Assign a range of values to a repeated protobuf field.
Definition: impl.hpp:125
void bp_compute_subgrid()
Definition: slice.hpp:332
slice_points_mode m_var_category
Definition: slice.hpp:115
std::vector< std::unique_ptr< OutputAbsDistMatrixType > > & get_all_activations()
std::vector< size_t > m_slice_points
Definition: slice.hpp:111
int get_num_children() const noexcept
Get number of child layers.
Definition: layer.hpp:576
slice_layer & operator=(const slice_layer &other)=default
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: slice.hpp:69
size_t m_slice_dim
Definition: slice.hpp:109
void write_specific_proto(lbann_data::Layer &proto) const final
Definition: slice.hpp:146
std::string get_type() const override
Get the layer type&#39;s name.
Definition: slice.hpp:179
void setup_slice_points(size_t slice_dim, std::vector< size_t > slice_points)
Definition: slice.hpp:74
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
slice_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: slice.hpp:173
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
Definition: slice.hpp:317
void fp_compute_subgrid()
Definition: slice.hpp:252
friend void fp_setup_outputs_impl(slice_layer< U, Layout, D > &)
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
void bp_setup_gradient_wrt_inputs() override
Setup gradient w.r.t. input tensors. Called by the &#39;back_prop&#39; function. Each gradient w...
Definition: slice.hpp:354
friend class cereal::access
Definition: slice.hpp:95
SubGraphCommunication get_communication_flag()
Definition: layer.hpp:487
friend void fp_compute_impl(slice_layer< U, Layout, Device > &)
description get_description() const override
Human-readable description.
Definition: slice.hpp:198
bool subgraph_parallelism_execution() const noexcept
Definition: layer.hpp:522
void fp_setup_outputs() override
Setup output tensors. Called by the &#39;forward_prop&#39; function. Each output tensor is resized to match t...
Definition: slice.hpp:246
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: slice.hpp:185
std::vector< std::unique_ptr< OutputAbsDistMatrixType > > & get_all_prev_error_signals()
const InputAbsDistMatrixType & get_error_signals(const Layer &parent) const override
friend void bp_compute_impl(slice_layer< U, Layout, Device > &)