27 #ifndef LBANN_LAYERS_LOSS_CROSS_ENTROPY_IMPL_HPP_INCLUDED 28 #define LBANN_LAYERS_LOSS_CROSS_ENTROPY_IMPL_HPP_INCLUDED 33 #ifdef LBANN_HAS_DISTCONV 35 #endif // LBANN_HAS_DISTCONV 39 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
43 this->set_output_dims({1});
45 #ifdef LBANN_HAS_DISTCONV 53 if (this->distconv_enabled()) {
59 const auto& parents = this->get_parent_layers();
62 std::stringstream err;
64 << get_type() <<
" layer \"" << this->get_name() <<
"\" " 65 <<
"only supports use_labels is not supported in model parallel layout" 70 const auto& predictions_dims = this->get_input_dims(0);
71 const auto& labels_dims = this->get_input_dims(1);
73 if (labels_dims[0] != 1) {
74 std::stringstream err;
75 err << get_type() <<
" layer \"" << this->get_name() <<
"\" " 76 <<
"expects the 0-th dimension of the tensor to be 1 when use labels " 77 <<
"is enabled. Found tensor with shape (";
81 for (
size_t j = 0; j < labels_dims.size(); ++j) {
82 err << (j > 0 ?
" x " :
"") << labels_dims[j];
91 if (predictions_dims.size() != labels_dims.size() ||
92 predictions_dims.size() < 2) {
93 std::stringstream err;
94 err << get_type() <<
" layer \"" << this->get_name() <<
"\" " 95 <<
"expects both input tensors to have the same number of dimensions " 96 <<
"and have >2 dimensions when use_labels is enabled. " 97 <<
"Found tensors with shape (";
101 for (
int i = 0; i < this->get_num_parents(); ++i) {
102 const auto& dims = this->get_input_dims(i);
103 err << (i > 0 ?
", " :
"") <<
"layer \"" << parents[i]->get_name()
105 for (
size_t j = 0; j < dims.size(); ++j) {
106 err << (j > 0 ?
" x " :
"") << dims[j];
114 if (!std::equal(predictions_dims.begin() + 1,
115 predictions_dims.end(),
116 labels_dims.begin() + 1)) {
117 std::stringstream err;
118 err << get_type() <<
" layer \"" << this->get_name() <<
"\" " 119 <<
"expects both input tensors to have the same shape after the 0-th " 120 <<
"dimesion when use_labels is enabled. Found tensors with shape (";
124 for (
int i = 0; i < this->get_num_parents(); ++i) {
125 const auto& dims = this->get_input_dims(i);
126 err << (i > 0 ?
", " :
"") <<
"layer \"" << parents[i]->get_name()
128 for (
size_t j = 0; j < dims.size(); ++j) {
129 err << (j > 0 ?
" x " :
"") << dims[j];
138 if (this->get_input_dims(0) != this->get_input_dims(1)) {
139 const auto& parents = this->get_parent_layers();
140 std::stringstream err;
141 err << get_type() <<
" layer \"" << this->get_name() <<
"\" " 142 <<
"has input tensors with different dimensions (";
143 for (
int i = 0; i < this->get_num_parents(); ++i) {
144 const auto& dims = this->get_input_dims(i);
145 err << (i > 0 ?
", " :
"") <<
"layer \"" << parents[i]->get_name()
147 for (
size_t j = 0; j < dims.size(); ++j) {
148 err << (j > 0 ?
" x " :
"") << dims[j];
157 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
159 size_t max_mini_batch_size)
164 const auto&
prediction = this->get_prev_activations(0);
165 switch (this->get_data_layout()) {
177 #ifdef HYDROGEN_HAVE_CUB 178 if (m_workspace->GetLocalDevice() == El::Device::GPU) {
179 m_workspace->Matrix().SetMemoryMode(1);
181 #endif // HYDROGEN_HAVE_CUB 184 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
188 #ifdef LBANN_HAS_DISTCONV 189 if (this->distconv_enabled()) {
190 fp_compute_distconv();
194 #endif // LBANN_HAS_DISTCONV 197 const auto&
prediction = this->get_prev_activations(0);
198 m_workspace->AlignWith(
prediction.DistData());
204 this->get_comm()->allreduce(*m_workspace, m_workspace->RedundantComm());
205 El::Copy(*m_workspace, this->get_activations());
208 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
212 #ifdef LBANN_HAS_DISTCONV 213 if (this->distconv_enabled()) {
214 bp_compute_distconv();
217 #endif // LBANN_HAS_DISTCONV 220 const auto&
prediction = this->get_prev_activations(0);
221 m_workspace->AlignWith(
prediction.DistData());
222 El::Copy(this->get_prev_error_signals(), *m_workspace);
228 template <
typename T, data_layout L, El::Device D>
230 lbann_data::Layer& proto)
const 232 proto.set_datatype(proto::ProtoDataType<T>);
233 auto* msg = proto.mutable_cross_entropy();
234 msg->set_use_labels(m_use_labels);
237 #ifdef LBANN_HAS_ONNX 238 template <
typename T, data_layout L, El::Device D>
241 auto const parents = this->get_parent_layers();
243 auto* log = graph.add_node();
244 size_t idx = parents[0]->find_child_layer_index(*
this);
245 log->add_input(parents[0]->get_name() +
"_" +
std::to_string(idx));
246 log->add_output(this->get_name() +
"_log");
247 log->set_name(this->get_name() +
"_log");
248 log->set_op_type(
"Log");
250 log->set_doc_string(
"Log node for Cross Entropy Layer");
253 auto* mul = graph.add_node();
254 idx = parents[1]->find_child_layer_index(*
this);
255 mul->add_input(parents[1]->get_name() +
"_" +
std::to_string(idx));
256 mul->add_input(log->output(0));
257 mul->add_output(this->get_name() +
"_mul");
258 mul->set_name(this->get_name() +
"_mul");
259 mul->set_op_type(
"Mul");
261 mul->set_doc_string(
"Multiply node for Cross Entropy Layer");
264 auto* shape = graph.add_initializer();
265 shape->set_name(this->get_name() +
"_mul_shape");
266 shape->set_data_type(onnx::TensorProto::INT64);
268 shape->add_int64_data(0);
269 shape->add_int64_data(-1);
270 shape->set_doc_string(this->get_name() +
" shape to reshape multiply");
272 auto* reshape = graph.add_node();
273 reshape->add_input(mul->output(0));
274 reshape->add_input(shape->name());
275 reshape->add_output(this->get_name() +
"_mul_reshape");
276 reshape->set_name(this->get_name() +
"_mul_reshape");
277 reshape->set_op_type(
"Reshape");
278 reshape->set_domain(
"");
279 reshape->set_doc_string(
"Reshape muultiply result for Cross Entropy Layer");
283 auto* axes = graph.add_initializer();
284 axes->set_name(this->get_name() +
"_reducesum_axes");
285 axes->set_data_type(onnx::TensorProto::INT64);
287 axes->add_int64_data(-1);
288 axes->set_doc_string(this->get_name() +
"ReduceSum axes");
290 auto* reduce_sum = graph.add_node();
291 reduce_sum->add_input(reshape->output(0));
292 reduce_sum->add_input(axes->name());
293 for (
auto const* child : this->get_child_layers()) {
294 idx = this->find_child_layer_index(*child);
295 reduce_sum->add_output(this->get_name() +
"_" +
std::to_string(idx));
297 reduce_sum->set_name(this->get_name() +
"_reducesum");
298 reduce_sum->set_op_type(
"ReduceSum");
299 reduce_sum->set_domain(
"");
300 reduce_sum->set_doc_string(
"ReduceSum node for Cross Entropy Layer");
302 #endif // LBANN_HAS_ONNX 304 #ifdef LBANN_HAS_DISTCONV 305 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
306 const cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>&
310 const cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
314 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
315 cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>&
319 cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
322 .get_distconv_adapter());
325 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
326 dc::Shape cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>::
327 get_prev_activations_shape(
int index)
const 334 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
335 dc::Shape cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>::
336 get_activations_shape(
int output_index)
const 341 dc::Shape shape = this->get_prev_activations_shape(0);
342 for (
int i = 0; i < shape.num_dims() - 1; ++i) {
348 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
349 dc::Shape cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>::
350 get_activations_local_shape(
int index)
const 353 auto input_shape = this->get_prev_activations().get_local_shape();
354 for (
int i = 0; i < input_shape.length() - 1; ++i) {
360 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
361 void cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>::
366 auto activations_split = this->get_activations_dist().get_split_shape();
367 auto prev_error_signals_split =
368 this->get_prev_error_signals_dist().get_split_shape();
369 for (
int i = 0; i < activations_split.length() - 1; ++i) {
370 activations_split[i] = 1;
371 prev_error_signals_split[i] = 1;
373 this->get_activations_dist().set_split_shape(activations_split);
374 this->get_prev_error_signals_dist().set_split_shape(prev_error_signals_split);
376 for (
auto& d : this->m_prev_activations_dists) {
381 for (
auto& d : this->m_activations_dists) {
386 for (
auto& d : this->m_prev_error_signals_dists) {
391 for (
auto& d : this->m_error_signals_dists) {
398 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
399 void cross_entropy_distconv_adapter<TensorDataType, T_layout, Dev>::setup_layer(
400 size_t workspace_capacity)
403 std::make_unique<dc::CrossEntropy>(dc::get_backend(), m_use_labels);
404 m_cross_entropy->setup(this->get_prev_activations(0),
405 this->get_prev_activations(1),
406 this->get_activations(0));
408 #endif // LBANN_HAS_DISTCONV 412 #endif // LBANN_LAYERS_LOSS_CROSS_ENTROPY_IMPL_HPP_INCLUDED void setup_dims() override
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
virtual void setup_dims()
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
El::DistMatrix< TensorDataType, El::STAR, El::VC, El::ELEMENT, D > StarVCMatDT
void mark_updated(const dc::Dist &d)
El::DistMatrix< TensorDataType, El::STAR, El::MR, El::ELEMENT, D > StarMRMatDT
ColSumMat.
Cross entropy between probability vectors.
virtual void setup_distributions(tensor_overlap_constraints &constraints)
std::string to_string(El::Device const &d)
void setup_data(size_t max_mini_batch_size) override
Setup layer data. Called by the 'setup' function. Memory is allocated for distributed matrices...
void bp_compute() override
Compute objective funciton gradients. Called by the 'back_prop' function. Given the input...
::distconv::tensor::Shape Shape
void fp_compute() override
Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.
void write_specific_proto(lbann_data::Layer &proto) const final
virtual dc::Shape get_prev_activations_shape(int input_index=0) const
void setup_data(size_t max_mini_batch_size) override
void mark_invariant(const dc::Dist &d)