LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
layers/activations/softmax.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED
28 #define LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED
29 
33 #if defined LBANN_HAS_DNN_LIB
36 #endif // defined LBANN_HAS_DNN_LIB
37 #include "lbann/proto/layers.pb.h"
39 
40 #ifdef LBANN_HAS_DISTCONV
41 #include "distconv/dnn_backend/softmax.hpp"
42 #include "lbann/utils/distconv.hpp"
43 #endif
44 
45 // Threshold outputs to a minimum value.
46 
47 // If enabled, the minimum output value is sqrt(min), where min is the
48 // minimum, normalized, positive value (~1e-19 for float and ~1e-154
49 // for double). During backprop, gradients are computed as if
50 // thresholding did not occur, so there will be a discrepancy for
51 // values that are thresholded.
52 #define LBANN_ENABLE_SOFTMAX_THRESHOLD
53 
54 namespace lbann {
55 
56 #ifdef LBANN_HAS_DISTCONV
57 namespace dc {
58 using Backend = ::distconv::BackendDNNLib;
59 using Softmax = ::distconv::Softmax<Backend>;
60 } // namespace dc
61 
62 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
63 class softmax_distconv_adapter
64  : public data_type_distconv_adapter<TensorDataType>
65 {
66 public:
67  using TensorDevType =
69 
70  softmax_distconv_adapter(Layer& layer)
71  : data_type_distconv_adapter<TensorDataType>(layer)
72  {}
73  virtual ~softmax_distconv_adapter() = default;
74 
75  void setup_distributions(tensor_overlap_constraints& constraints) override;
76  void setup_layer(size_t workspace_capacity) override;
77 
78  std::unique_ptr<dc::Softmax> m_softmax;
79 };
80 #endif // LBANN_HAS_DISTCONV
81 
85 template <typename TensorDataType, data_layout Layout, El::Device Device>
86 class softmax_layer : public data_type_layer<TensorDataType>
87 {
88 public:
90 
93  using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;
94 
96 
97 public:
99  : data_type_layer<TensorDataType>(comm),
100  m_mode(mode)
101 #ifdef LBANN_HAS_DNN_LIB
102  ,
103  m_tensors_dnn_desc(this)
104 #endif // LBANN_HAS_DNN_LIB
105  {
106  if (mode == softmax_mode::INVALID) {
107  LBANN_ERROR("invalid softmax mode");
108  }
109  }
110 
112  : data_type_layer<TensorDataType>(other),
113  m_mode(other.m_mode),
114  m_workspace(other.m_workspace ? other.m_workspace->Copy() : nullptr)
115 #ifdef LBANN_HAS_DNN_LIB
116  ,
117  m_tensors_dnn_desc(other.m_tensors_dnn_desc)
118 #endif // LBANN_HAS_DNN_LIB
119  {
120 #ifdef LBANN_HAS_DNN_LIB
121  m_tensors_dnn_desc.set_layer(this);
122 #endif // LBANN_HAS_DNN_LIB
123  }
124 
125  ~softmax_layer() = default;
126 
127  softmax_layer* copy() const final { return new softmax_layer(*this); }
128  std::string get_type() const final { return "softmax"; }
129  data_layout get_data_layout() const final { return Layout; }
130  El::Device get_device_allocation() const final { return Device; }
131 
132  // Softmax can run in-place (local workspace acts as an
133  // intermediate buffer)
134  bool can_run_inplace() const override { return true; }
135  int get_backprop_requirements() const override
136  {
137  return ERROR_SIGNALS | ACTIVATIONS;
138  }
139 
140 #ifdef LBANN_HAS_ONNX
141  std::string get_onnx_op_type() const override { return "Softmax"; }
142 #endif // LBANN_HAS_ONNX
143 
144  void setup_dims() final
145  {
147  this->set_output_dims(this->get_input_dims());
148  }
149 
150  void setup_data(size_t max_mini_batch_size) override
151  {
152  data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);
153  auto dist = this->get_prev_activations().DistData();
154  dist.colDist = El::STAR;
155  m_workspace.reset(AbsDistMatrixType::Instantiate(dist));
156 #ifdef HYDROGEN_HAVE_CUB
157  if (m_workspace->GetLocalDevice() == El::Device::GPU) {
158  m_workspace->Matrix().SetMemoryMode(1); // CUB memory pool
159  }
160 #endif // HYDROGEN_HAVE_CUB
161 #ifdef LBANN_HAS_DNN_LIB
162  if (!m_tensors_dnn_desc.get_layer())
163  m_tensors_dnn_desc.set_layer(this);
164 #endif // LBANN_HAS_DNN_LIB
165  }
166 
167  void fp_compute() final;
168  void bp_compute() final;
169 
170  template <typename U>
171  friend void fp_compute_impl(softmax_layer<U, Layout, Device>& l);
172  template <typename U>
173  friend void bp_compute_impl(softmax_layer<U, Layout, Device>& l);
174 
176 
178  template <typename ArchiveT>
179  void serialize(ArchiveT& ar);
180 
182 
183 protected:
185  void write_specific_proto(lbann_data::Layer& proto) const final;
186 
187 private:
189 
191  // using dnn_backend = dnn_lib::get_backend<Device>;
192 #ifdef LBANN_HAS_ONEDNN_CPU
193  using dnn_backend = onednn_backend<Device>;
194 #else
196 #endif
198 
215 
217 
218  friend cereal::access;
219  softmax_layer() : data_type_layer<TensorDataType>(nullptr) {}
220 
223 
228  std::unique_ptr<AbsDistMatrixType> m_workspace;
229 
230 #ifdef LBANN_HAS_DNN_LIB
231 
233  m_tensors_dnn_desc;
234 #endif // LBANN_HAS_DNN_LIB
235 
236 // Minimum output value to avoid denormalized floats
237 #ifdef LBANN_ENABLE_SOFTMAX_THRESHOLD
238  TensorDataType threshold_val = static_cast<TensorDataType>(
239  El::Sqrt(std::numeric_limits<TensorDataType>::min()));
240 #else
241  TensorDataType threshold_val = El::TypeTraits<TensorDataType>::Zero();
242 #endif // LBANN_ENABLE_SOFTMAX_THRESHOLD
243 
244 #ifdef LBANN_HAS_DISTCONV
245  friend class softmax_distconv_adapter<TensorDataType, Layout, Device>;
246 
247 protected:
248  bool is_distconv_supported() const final
249  {
250  return Device == El::Device::GPU && Layout == data_layout::DATA_PARALLEL;
251  }
252  void setup_distconv_adapter() final
253  {
254  this->get_distconv_adapter_ptr() = std::make_unique<
255  softmax_distconv_adapter<TensorDataType, Layout, Device>>(*this);
256  }
257  softmax_distconv_adapter<TensorDataType, Layout, Device>&
258  get_distconv_adapter() final;
259  const softmax_distconv_adapter<TensorDataType, Layout, Device>&
260  get_distconv_adapter() const final;
261 #endif // LBANN_HAS_DISTCONV
262 };
263 
264 #ifndef LBANN_SOFTMAX_LAYER_INSTANTIATE
265 #define PROTO_DEVICE(T, Device) \
266  extern template class softmax_layer<T, data_layout::DATA_PARALLEL, Device>; \
267  extern template class softmax_layer<T, data_layout::MODEL_PARALLEL, Device>
268 
270 #undef PROTO_DEVICE
271 #endif // LBANN_SOFTMAX_LAYER_INSTANTIATE
272 
273 } // namespace lbann
274 
275 #endif // LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED
data_layout get_data_layout() const final
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
DNN library backend for hand-rolled, OMP-based implementations.
Definition: openmp.hpp:43
std::string get_type() const final
Get the layer type&#39;s name.
softmax_layer(lbann_comm *comm, softmax_mode mode)
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
#define LBANN_ERROR(...)
Definition: exception.hpp:37
std::unique_ptr< AbsDistMatrixType > m_workspace
Workspace for column-wise reductions.
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
typename dnn_backend::TensorDescriptor dnnTensorDescriptor
dnnTensorDescriptor grad_wrt_input_descriptor_
Descriptor for local input gradient tensor.
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
softmax_layer * copy() const final
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
constexpr El::Device Device
dnnTensorDescriptor grad_wrt_output_descriptor_
Descriptor for local output gradient tensor.
void setup_dims() final
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
void setup_data(size_t max_mini_batch_size) override
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
El::Device get_device_allocation() const final
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
dnnTensorDescriptor output_descriptor_
Descriptor for local output tensor.
dnnTensorDescriptor input_descriptor_
Descriptor for local input tensor.
softmax_mode
Which tensor dimensions to apply softmax over.
Definition: dnn_enums.hpp:87
softmax_layer(const softmax_layer &other)
dc::TensorDev< OutputTensorDataType > TensorDevType