d6/d14/layers_2activations_2softmax_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.

 #ifndef LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED
 #define LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED

 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/proto/datatype_helpers.hpp"
 #include "lbann/utils/dnn_enums.hpp"
 #if defined LBANN_HAS_DNN_LIB
 #include "lbann/utils/dnn_lib/helpers.hpp"
 #include "lbann/utils/dnn_lib/softmax.hpp"
 #endif // defined LBANN_HAS_DNN_LIB
 #include "lbann/proto/layers.pb.h"
 #include "lbann/utils/dnn_lib/softmax.hpp"

 #ifdef LBANN_HAS_DISTCONV
 #include "distconv/dnn_backend/softmax.hpp"
 #include "lbann/utils/distconv.hpp"
 #endif

 // Threshold outputs to a minimum value.

 // If enabled, the minimum output value is sqrt(min), where min is the
 // minimum, normalized, positive value (~1e-19 for float and ~1e-154
 // for double). During backprop, gradients are computed as if
 // thresholding did not occur, so there will be a discrepancy for
 // values that are thresholded.
 #define LBANN_ENABLE_SOFTMAX_THRESHOLD

 namespace lbann {

 #ifdef LBANN_HAS_DISTCONV
 namespace dc {
 using Backend = ::distconv::BackendDNNLib;
 using Softmax = ::distconv::Softmax<Backend>;
 } // namespace dc

 template <typename TensorDataType, data_layout T_layout, El::Device Dev>
 class softmax_distconv_adapter
   : public data_type_distconv_adapter<TensorDataType>
 {
 public:
   using TensorDevType =
     typename data_type_distconv_adapter<TensorDataType>::TensorDevType;

   softmax_distconv_adapter(Layer& layer)
     : data_type_distconv_adapter<TensorDataType>(layer)
   {}
   virtual ~softmax_distconv_adapter() = default;

   void setup_distributions(tensor_overlap_constraints& constraints) override;
   void setup_layer(size_t workspace_capacity) override;

   std::unique_ptr<dc::Softmax> m_softmax;
 };
 #endif // LBANN_HAS_DISTCONV

 template <typename TensorDataType, data_layout Layout, El::Device Device>
 class softmax_layer : public data_type_layer<TensorDataType>
 {
 public:

   using AbsDistMatrixType = El::AbstractDistMatrix<TensorDataType>;


 public:
   softmax_layer(lbann_comm* comm, softmax_mode mode)
     : data_type_layer<TensorDataType>(comm),
       m_mode(mode)
 #ifdef LBANN_HAS_DNN_LIB
       ,
       m_tensors_dnn_desc(this)
 #endif // LBANN_HAS_DNN_LIB
   {
     if (mode == softmax_mode::INVALID) {
       LBANN_ERROR("invalid softmax mode");
     }
   }

   softmax_layer(const softmax_layer& other)
     : data_type_layer<TensorDataType>(other),
       m_mode(other.m_mode),
       m_workspace(other.m_workspace ? other.m_workspace->Copy() : nullptr)
 #ifdef LBANN_HAS_DNN_LIB
       ,
       m_tensors_dnn_desc(other.m_tensors_dnn_desc)
 #endif // LBANN_HAS_DNN_LIB
   {
 #ifdef LBANN_HAS_DNN_LIB
     m_tensors_dnn_desc.set_layer(this);
 #endif // LBANN_HAS_DNN_LIB
   }

   ~softmax_layer() = default;

   softmax_layer* copy() const final { return new softmax_layer(*this); }
   std::string get_type() const final { return "softmax"; }
   data_layout get_data_layout() const final { return Layout; }
   El::Device get_device_allocation() const final { return Device; }

   // Softmax can run in-place (local workspace acts as an
   // intermediate buffer)
   bool can_run_inplace() const override { return true; }
   int get_backprop_requirements() const override
   {
     return ERROR_SIGNALS | ACTIVATIONS;
   }

 #ifdef LBANN_HAS_ONNX
   std::string get_onnx_op_type() const override { return "Softmax"; }
 #endif // LBANN_HAS_ONNX

   void setup_dims() final
   {
     data_type_layer<TensorDataType>::setup_dims();
     this->set_output_dims(this->get_input_dims());
   }

   void setup_data(size_t max_mini_batch_size) override
   {
     data_type_layer<TensorDataType>::setup_data(max_mini_batch_size);
     auto dist = this->get_prev_activations().DistData();
     dist.colDist = El::STAR;
     m_workspace.reset(AbsDistMatrixType::Instantiate(dist));
 #ifdef HYDROGEN_HAVE_CUB
     if (m_workspace->GetLocalDevice() == El::Device::GPU) {
       m_workspace->Matrix().SetMemoryMode(1); // CUB memory pool
     }
 #endif // HYDROGEN_HAVE_CUB
 #ifdef LBANN_HAS_DNN_LIB
     if (!m_tensors_dnn_desc.get_layer())
       m_tensors_dnn_desc.set_layer(this);
 #endif // LBANN_HAS_DNN_LIB
   }

   void fp_compute() final;
   void bp_compute() final;

   template <typename U>
   friend void fp_compute_impl(softmax_layer<U, Layout, Device>& l);
   template <typename U>
   friend void bp_compute_impl(softmax_layer<U, Layout, Device>& l);


   template <typename ArchiveT>
   void serialize(ArchiveT& ar);


 protected:
   void write_specific_proto(lbann_data::Layer& proto) const final;

 private:

   // using dnn_backend = dnn_lib::get_backend<Device>;
 #ifdef LBANN_HAS_ONEDNN_CPU
   using dnn_backend = onednn_backend<Device>;
 #else
   using dnn_backend = openmp_backend;
 #endif
   using dnnTensorDescriptor = typename dnn_backend::TensorDescriptor;

   dnnTensorDescriptor input_descriptor_;
   dnnTensorDescriptor output_descriptor_;
   dnnTensorDescriptor grad_wrt_input_descriptor_;
   dnnTensorDescriptor grad_wrt_output_descriptor_;


   friend cereal::access;
   softmax_layer() : data_type_layer<TensorDataType>(nullptr) {}

   softmax_mode m_mode;

   std::unique_ptr<AbsDistMatrixType> m_workspace;

 #ifdef LBANN_HAS_DNN_LIB

   dnn_lib::data_parallel_layer_tensor_manager<TensorDataType>
     m_tensors_dnn_desc;
 #endif // LBANN_HAS_DNN_LIB

 // Minimum output value to avoid denormalized floats
 #ifdef LBANN_ENABLE_SOFTMAX_THRESHOLD
   TensorDataType threshold_val = static_cast<TensorDataType>(
     El::Sqrt(std::numeric_limits<TensorDataType>::min()));
 #else
   TensorDataType threshold_val = El::TypeTraits<TensorDataType>::Zero();
 #endif // LBANN_ENABLE_SOFTMAX_THRESHOLD

 #ifdef LBANN_HAS_DISTCONV
   friend class softmax_distconv_adapter<TensorDataType, Layout, Device>;

 protected:
   bool is_distconv_supported() const final
   {
     return Device == El::Device::GPU && Layout == data_layout::DATA_PARALLEL;
   }
   void setup_distconv_adapter() final
   {
     this->get_distconv_adapter_ptr() = std::make_unique<
       softmax_distconv_adapter<TensorDataType, Layout, Device>>(*this);
   }
   softmax_distconv_adapter<TensorDataType, Layout, Device>&
   get_distconv_adapter() final;
   const softmax_distconv_adapter<TensorDataType, Layout, Device>&
   get_distconv_adapter() const final;
 #endif // LBANN_HAS_DISTCONV
 };

 #ifndef LBANN_SOFTMAX_LAYER_INSTANTIATE
 #define PROTO_DEVICE(T, Device)                                                \
   extern template class softmax_layer<T, data_layout::DATA_PARALLEL, Device>;  \
   extern template class softmax_layer<T, data_layout::MODEL_PARALLEL, Device>

 #include "lbann/macros/instantiate_device.hpp"
 #undef PROTO_DEVICE
 #endif // LBANN_SOFTMAX_LAYER_INSTANTIATE

 } // namespace lbann

 #endif // LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED
lbann::softmax_layer::get_data_layout
data_layout get_data_layout() const final
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: layers/activations/softmax.hpp:129

lbann::Layer::setup_dims
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.

lbann::openmp_backend
DNN library backend for hand-rolled, OMP-based implementations.
Definition: openmp.hpp:43

lbann::ERROR_SIGNALS
Definition: base.hpp:207

lbann::softmax_layer::get_type
std::string get_type() const final
Get the layer type&#39;s name.
Definition: layers/activations/softmax.hpp:128

lbann::softmax_layer::softmax_layer
softmax_layer(lbann_comm *comm, softmax_mode mode)
Definition: layers/activations/softmax.hpp:98

lbann::softmax_layer::AbsDistMatrixType
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
Definition: layers/activations/softmax.hpp:93

LBANN_ERROR
#define LBANN_ERROR(...)
Definition: exception.hpp:37

lbann::softmax_layer::m_workspace
std::unique_ptr< AbsDistMatrixType > m_workspace
Workspace for column-wise reductions.
Definition: layers/activations/softmax.hpp:228

distconv.hpp

lbann::lbann_comm
Definition: comm.hpp:105

lbann::softmax_layer
Definition: layers/activations/softmax.hpp:86

lbann::protobuf::serialize
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.

lbann::softmax_layer::dnnTensorDescriptor
typename dnn_backend::TensorDescriptor dnnTensorDescriptor
Definition: layers/activations/softmax.hpp:197

lbann::softmax_layer::grad_wrt_input_descriptor_
dnnTensorDescriptor grad_wrt_input_descriptor_
Descriptor for local input gradient tensor.
Definition: layers/activations/softmax.hpp:210

lbann::softmax_layer::setup_data
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the &#39;setup&#39; function. Memory is allocated for distributed matrices...
Definition: layers/activations/softmax.hpp:150

lbann::openmp_backend::TensorDescriptor
Definition: openmp.hpp:60

lbann::softmax_layer::copy
softmax_layer * copy() const final
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: layers/activations/softmax.hpp:127

lbann::dnn_lib::data_parallel_layer_tensor_manager
Definition: data_type_layer.hpp:52

softmax.hpp

lbann::softmax_layer::softmax_layer
softmax_layer()
Definition: layers/activations/softmax.hpp:219

lbann::reduction_mode::INVALID

lbann::softmax_layer::get_backprop_requirements
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: layers/activations/softmax.hpp:135

lbann::Device
constexpr El::Device Device
Definition: OperatorTraits.hpp:62

lbann::softmax_layer::grad_wrt_output_descriptor_
dnnTensorDescriptor grad_wrt_output_descriptor_
Descriptor for local output gradient tensor.
Definition: layers/activations/softmax.hpp:214

lbann::ACTIVATIONS
Definition: base.hpp:209

helpers.hpp

lbann::data_type_layer
Definition: data_type_layer.hpp:69

lbann::softmax_layer::setup_dims
void setup_dims() final
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Definition: layers/activations/softmax.hpp:144

lbann::data_layout::DATA_PARALLEL

lbann::softmax_layer::m_mode
softmax_mode m_mode
Definition: layers/activations/softmax.hpp:222

dnn_enums.hpp

instantiate_device.hpp

lbann::data_layout
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218

data_type_layer.hpp

lbann::data_type_layer::setup_data
void setup_data(size_t max_mini_batch_size) override

lbann::softmax_layer::can_run_inplace
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: layers/activations/softmax.hpp:134

lbann::softmax_layer::get_device_allocation
El::Device get_device_allocation() const final
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: layers/activations/softmax.hpp:130

lbann::softmax_layer::output_descriptor_
dnnTensorDescriptor output_descriptor_
Descriptor for local output tensor.
Definition: layers/activations/softmax.hpp:206

lbann::softmax_layer::input_descriptor_
dnnTensorDescriptor input_descriptor_
Descriptor for local input tensor.
Definition: layers/activations/softmax.hpp:202

lbann::softmax_mode
softmax_mode
Which tensor dimensions to apply softmax over.
Definition: dnn_enums.hpp:87

lbann::softmax_layer::softmax_layer
softmax_layer(const softmax_layer &other)
Definition: layers/activations/softmax.hpp:111

lbann
Definition: callback_helpers.hpp:32

lbann::data_type_distconv_adapter::TensorDevType
dc::TensorDev< OutputTensorDataType > TensorDevType
Definition: data_type_distconv_adapter.hpp:60

datatype_helpers.hpp