27 #ifndef LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED 28 #define LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED 33 #if defined LBANN_HAS_DNN_LIB 36 #endif // defined LBANN_HAS_DNN_LIB 37 #include "lbann/proto/layers.pb.h" 40 #ifdef LBANN_HAS_DISTCONV 41 #include "distconv/dnn_backend/softmax.hpp" 52 #define LBANN_ENABLE_SOFTMAX_THRESHOLD 56 #ifdef LBANN_HAS_DISTCONV 58 using Backend = ::distconv::BackendDNNLib;
59 using Softmax = ::distconv::Softmax<Backend>;
62 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
63 class softmax_distconv_adapter
64 :
public data_type_distconv_adapter<TensorDataType>
70 softmax_distconv_adapter(Layer& layer)
71 : data_type_distconv_adapter<TensorDataType>(layer)
73 virtual ~softmax_distconv_adapter() =
default;
75 void setup_distributions(tensor_overlap_constraints& constraints)
override;
76 void setup_layer(
size_t workspace_capacity)
override;
78 std::unique_ptr<dc::Softmax> m_softmax;
80 #endif // LBANN_HAS_DISTCONV 85 template <
typename TensorDataType, data_layout Layout, El::Device Device>
101 #ifdef LBANN_HAS_DNN_LIB
103 m_tensors_dnn_desc(this)
113 m_mode(other.m_mode),
114 m_workspace(other.m_workspace ? other.m_workspace->Copy() : nullptr)
115 #ifdef LBANN_HAS_DNN_LIB
117 m_tensors_dnn_desc(other.m_tensors_dnn_desc)
120 #ifdef LBANN_HAS_DNN_LIB 121 m_tensors_dnn_desc.set_layer(
this);
122 #endif // LBANN_HAS_DNN_LIB 128 std::string
get_type() const final {
return "softmax"; }
140 #ifdef LBANN_HAS_ONNX 141 std::string get_onnx_op_type()
const override {
return "Softmax"; }
142 #endif // LBANN_HAS_ONNX 147 this->set_output_dims(this->get_input_dims());
153 auto dist = this->get_prev_activations().DistData();
154 dist.colDist = El::STAR;
155 m_workspace.reset(AbsDistMatrixType::Instantiate(dist));
156 #ifdef HYDROGEN_HAVE_CUB 157 if (m_workspace->GetLocalDevice() == El::Device::GPU) {
158 m_workspace->Matrix().SetMemoryMode(1);
160 #endif // HYDROGEN_HAVE_CUB 161 #ifdef LBANN_HAS_DNN_LIB 162 if (!m_tensors_dnn_desc.get_layer())
163 m_tensors_dnn_desc.set_layer(
this);
164 #endif // LBANN_HAS_DNN_LIB 167 void fp_compute()
final;
168 void bp_compute()
final;
170 template <
typename U>
172 template <
typename U>
178 template <
typename ArchiveT>
185 void write_specific_proto(lbann_data::Layer& proto)
const final;
192 #ifdef LBANN_HAS_ONEDNN_CPU 218 friend cereal::access;
230 #ifdef LBANN_HAS_DNN_LIB 234 #endif // LBANN_HAS_DNN_LIB 237 #ifdef LBANN_ENABLE_SOFTMAX_THRESHOLD 238 TensorDataType threshold_val =
static_cast<TensorDataType
>(
239 El::Sqrt(std::numeric_limits<TensorDataType>::min()));
241 TensorDataType threshold_val = El::TypeTraits<TensorDataType>::Zero();
242 #endif // LBANN_ENABLE_SOFTMAX_THRESHOLD 244 #ifdef LBANN_HAS_DISTCONV 245 friend class softmax_distconv_adapter<TensorDataType, Layout,
Device>;
248 bool is_distconv_supported() const final
252 void setup_distconv_adapter() final
254 this->get_distconv_adapter_ptr() = std::make_unique<
255 softmax_distconv_adapter<TensorDataType, Layout, Device>>(*this);
257 softmax_distconv_adapter<TensorDataType, Layout, Device>&
258 get_distconv_adapter() final;
259 const softmax_distconv_adapter<TensorDataType, Layout,
Device>&
260 get_distconv_adapter() const final;
261 #endif // LBANN_HAS_DISTCONV 264 #ifndef LBANN_SOFTMAX_LAYER_INSTANTIATE 265 #define PROTO_DEVICE(T, Device) \ 266 extern template class softmax_layer<T, data_layout::DATA_PARALLEL, Device>; \ 267 extern template class softmax_layer<T, data_layout::MODEL_PARALLEL, Device> 271 #endif // LBANN_SOFTMAX_LAYER_INSTANTIATE 275 #endif // LBANN_LAYERS_ACTIVATIONS_SOFTMAX_HPP_INCLUDED data_layout get_data_layout() const final
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
virtual void setup_dims()
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
DNN library backend for hand-rolled, OMP-based implementations.
std::string get_type() const final
Get the layer type's name.
softmax_layer(lbann_comm *comm, softmax_mode mode)
El::AbstractDistMatrix< TensorDataType > AbsDistMatrixType
The tensor type expected in this object.
std::unique_ptr< AbsDistMatrixType > m_workspace
Workspace for column-wise reductions.
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
typename dnn_backend::TensorDescriptor dnnTensorDescriptor
dnnTensorDescriptor grad_wrt_input_descriptor_
Descriptor for local input gradient tensor.
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices...
softmax_layer * copy() const final
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
constexpr El::Device Device
dnnTensorDescriptor grad_wrt_output_descriptor_
Descriptor for local output gradient tensor.
void setup_dims() final
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
data_layout
Data layout that is optimized for different modes of parallelism.
void setup_data(size_t max_mini_batch_size) override
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
El::Device get_device_allocation() const final
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
dnnTensorDescriptor output_descriptor_
Descriptor for local output tensor.
dnnTensorDescriptor input_descriptor_
Descriptor for local input tensor.
softmax_mode
Which tensor dimensions to apply softmax over.
softmax_layer(const softmax_layer &other)
dc::TensorDev< OutputTensorDataType > TensorDevType