dd/dae/cross__grid__sum_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.

 #ifndef LBANN_LAYER_CROSS_GRID_SUM_HPP_INCLUDED
 #define LBANN_LAYER_CROSS_GRID_SUM_HPP_INCLUDED

 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/utils/exception.hpp"

 namespace lbann {

 template <typename TensorDataType, El::Device Dev>
 class cross_grid_sum_layer final : public data_type_layer<TensorDataType>
 {
 public:
   cross_grid_sum_layer(lbann_comm* comm) : data_type_layer<TensorDataType>(comm)
   {
     this->m_expected_num_parent_layers = -1; // No limit on parents
     this->m_expected_num_child_layers = -1;  // No limit on children
     this->set_subgraph_parallelism_execution();
   }

   cross_grid_sum_layer* copy() const final
   {
     return new cross_grid_sum_layer(*this);
   }
   std::string get_type() const final { return "cross_grid_sum"; }
   data_layout get_data_layout() const final
   {
     return data_layout::DATA_PARALLEL;
   }
   El::Device get_device_allocation() const final { return Dev; }
   bool can_run_inplace() const override { return false; }
   int get_backprop_requirements() const override { return ERROR_SIGNALS; }

 protected:
   void write_specific_proto(lbann_data::Layer& proto) const final;

 private:
   void setup_pointers() final
   {
     data_type_layer<TensorDataType>::setup_pointers();
     if (this->get_num_parents() < 1) {
       LBANN_ERROR(get_type(),
                   " layer \"",
                   this->get_name(),
                   "\" has no parent layers");
     }
   }

   void setup_dims() final
   {
     data_type_layer<TensorDataType>::setup_dims();
     this->set_output_dims(this->get_input_dims());

     // print dims
 #ifdef LBANN_DEBUG
     {
       const auto& dims_print = this->get_input_dims();
       auto const dims_size = dims_print.size();
       for (auto ii = 0UL; ii < dims_size; ++ii) {
         std::cout << "Index:" << ii << " dim" << dims_print[ii] << "\n";
       }
     }
 #endif // LBANN_DEBUG

     // Check that input dimensions match
     const auto& output_dims = this->get_output_dims();
     for (int i = 0; i < this->get_num_parents(); ++i) {
       if (this->get_input_dims(i) != output_dims) {
         const auto& parents = this->get_parent_layers();
         std::stringstream err;
         err << get_type() << " layer \"" << this->get_name() << "\" "
             << "has input tensors with incompatible dimensions (";
         for (int j = 0; j < this->get_num_parents(); ++j) {
           const auto& dims = this->get_input_dims(j);
           err << (j > 0 ? ", " : "") << "layer \"" << parents[j]->get_name()
               << "\" outputs ";
           for (size_t k = 0; k < dims.size(); ++k) {
             err << (k > 0 ? " x " : "") << dims[k];
           }
         }
         err << ")";
         LBANN_ERROR(err.str());
       }
     }
   }

   void fp_compute() final
   {
     auto parents = this->get_parent_layers();
     auto childs = this->get_child_layers();

     int tag = -1;
     for (int i = 0; i < El::To<int>(parents.size()); i++) {
       if (this->get_activations(i).Grid().InGrid())
         tag = i;
     }

     auto& output = this->get_activations(tag);
     auto& input = this->get_prev_activations(tag);
     El::Copy(input, output);

     auto& output_cast = dynamic_cast<
       El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>&>(
       output);

     auto const syncInfoOutput =
       El::SyncInfoFromMatrix(output_cast.LockedMatrix());

     const El::Int mloc = output_cast.LocalHeight();
     const El::Int nloc = output_cast.LocalWidth();

     El::Matrix<TensorDataType, Dev> temp_output(mloc, nloc);

     El::Copy(output_cast.LockedMatrix(), temp_output);

     El::mpi::AllReduce(temp_output.Buffer(),
                        output_cast.Buffer(),
                        mloc * nloc,
                        El::mpi::SUM,
                        this->get_subgrid_comm(),
                        syncInfoOutput);
   }

   void fp_setup_outputs() final
   {

     if (this->get_num_children() < 1) {
       return;
     }
     auto mini_batch_size =
       this->infer_mini_batch_size_from_parents_or_default_to_current();

     // Initialize output tensors
     for (int i = 0; i < this->get_num_children(); ++i) {

       auto& output = this->get_activations(i);
       output.Empty(false);
       output.Resize(this->get_output_size(i), mini_batch_size);
     }
   }

   void bp_setup_gradient_wrt_inputs() final
   {
     auto parents = this->get_parent_layers();
     auto children = this->get_child_layers();

     int tag_parent = -1;
     for (int i = 0; i < El::To<int>(parents.size()); i++) {
       if (this->get_error_signals(i).Grid().InGrid())
         tag_parent = parents[i]->get_grid_tag();
     }
     int const tag = tag_parent - 1;

     const auto& gradient_wrt_output = this->get_prev_error_signals(tag);
     auto& gradient_wrt_input = this->get_error_signals(tag);

     int gradient_wrt_output_Height = gradient_wrt_output.Height();
     int gradient_wrt_output_Width = gradient_wrt_output.Width();
     for (int i = 0; i < El::To<int>(children.size()); i++) {
       auto& gradient_wrt_input_cast = dynamic_cast<
         El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>&>(
         this->get_error_signals(i));
       gradient_wrt_input_cast.Resize(gradient_wrt_output_Height,
                                      gradient_wrt_output_Width);
     }

     El::Copy(gradient_wrt_output, gradient_wrt_input);

     auto& gradient_wrt_input_cast = dynamic_cast<
       El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>&>(
       gradient_wrt_input);

     const El::Int mloc = gradient_wrt_input_cast.LocalHeight();
     const El::Int nloc = gradient_wrt_input_cast.LocalWidth();

     El::Matrix<TensorDataType, Dev> temp_output(mloc, nloc);

     El::Copy(gradient_wrt_input_cast.LockedMatrix(), temp_output);

     El::AllReduce(gradient_wrt_input, this->get_subgrid_comm(), El::mpi::SUM);
   }

   void bp_compute() final {}
 };

 #ifndef LBANN_CROSS_GRID_SUM_LAYER_INSTANTIATE
 #define PROTO_DEVICE(T, Device)                                                \
   extern template class cross_grid_sum_layer<T, Device>

 #include "lbann/macros/instantiate_device.hpp"
 #undef PROTO_DEVICE

 #endif // LBANN_CROSS_GRID_SUM_LAYER_INSTANTIATE

 } // namespace lbann

 #endif // LBANN_LAYER_CROSS_GRID_SUM_HPP_INCLUDED
lbann::Layer::setup_dims
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.

lbann::ERROR_SIGNALS
Definition: base.hpp:207

lbann::cross_grid_sum_layer::write_specific_proto
void write_specific_proto(lbann_data::Layer &proto) const final

lbann::cross_grid_sum_layer::get_device_allocation
El::Device get_device_allocation() const final
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: cross_grid_sum.hpp:55

lbann::cross_grid_sum_layer::copy
cross_grid_sum_layer * copy() const final
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: cross_grid_sum.hpp:46

lbann::cross_grid_sum_layer::setup_pointers
void setup_pointers() final
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
Definition: cross_grid_sum.hpp:64

LBANN_ERROR
#define LBANN_ERROR(...)
Definition: exception.hpp:37

lbann::Layer::get_grid_tag
int get_grid_tag() const noexcept
Identifying tag for process grid.

lbann::Layer::get_output_size
int get_output_size(size_t output_index=0) const
Get output tensor size.

lbann::Layer::get_num_parents
int get_num_parents() const noexcept
Get number of parent layers.
Definition: layer.hpp:574

lbann::lbann_comm
Definition: comm.hpp:105

lbann::Layer::get_input_dims
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.

lbann::cross_grid_sum_layer::setup_dims
void setup_dims() final
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Definition: cross_grid_sum.hpp:75

lbann::Device
constexpr El::Device Device
Definition: OperatorTraits.hpp:62

lbann::cross_grid_sum_layer::get_backprop_requirements
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: cross_grid_sum.hpp:57

lbann::data_type_layer< TensorDataType >::get_prev_error_signals
OutputAbsDistMatrixType & get_prev_error_signals(int child_index=0)

lbann::data_type_layer< TensorDataType >::get_prev_activations
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)

lbann::data_type_layer< TensorDataType >::infer_mini_batch_size_from_parents_or_default_to_current
El::Int infer_mini_batch_size_from_parents_or_default_to_current() const override

lbann::data_type_layer< TensorDataType >::get_activations
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override

lbann::cross_grid_sum_layer
Definition: cross_grid_sum.hpp:36

lbann::Layer::m_expected_num_child_layers
int m_expected_num_child_layers
Expected number of child layers. A negative value indicates no limit.
Definition: layer.hpp:842

lbann::cross_grid_sum_layer::fp_compute
void fp_compute() final
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
Definition: cross_grid_sum.hpp:113

lbann::Layer::set_output_dims
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.

lbann::Layer::get_num_children
int get_num_children() const noexcept
Get number of child layers.
Definition: layer.hpp:576

lbann::Layer::get_parent_layers
std::vector< const Layer * > get_parent_layers() const

lbann::data_type_layer
Definition: data_type_layer.hpp:69

lbann::Layer::set_subgraph_parallelism_execution
void set_subgraph_parallelism_execution()
Definition: layer.hpp:515

lbann::cross_grid_sum_layer::can_run_inplace
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: cross_grid_sum.hpp:56

lbann::data_layout::DATA_PARALLEL

lbann::Layer::get_name
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332

lbann::Layer::get_child_layers
std::vector< const Layer * > get_child_layers() const

lbann::cross_grid_sum_layer::get_type
std::string get_type() const final
Get the layer type&#39;s name.
Definition: cross_grid_sum.hpp:50

exception.hpp

instantiate_device.hpp

lbann::cross_grid_sum_layer::cross_grid_sum_layer
cross_grid_sum_layer(lbann_comm *comm)
Definition: cross_grid_sum.hpp:39

lbann::cross_grid_sum_layer::bp_compute
void bp_compute() final
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
Definition: cross_grid_sum.hpp:209

lbann::data_layout
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218

data_type_layer.hpp

lbann::data_type_layer< TensorDataType >::get_subgrid_comm
El::mpi::Comm & get_subgrid_comm()
Definition: data_type_layer.hpp:182

lbann::cross_grid_sum_layer::fp_setup_outputs
void fp_setup_outputs() final
Setup output tensors. Called by the &#39;forward_prop&#39; function. Each output tensor is resized to match t...
Definition: cross_grid_sum.hpp:150

lbann::Layer::setup_pointers
virtual void setup_pointers()
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...

lbann::Layer::get_output_dims
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.

lbann::cross_grid_sum_layer::get_data_layout
data_layout get_data_layout() const final
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: cross_grid_sum.hpp:51

lbann::Layer::m_expected_num_parent_layers
int m_expected_num_parent_layers
Definition: layer.hpp:838

lbann::data_type_layer< TensorDataType >::get_error_signals
const InputAbsDistMatrixType & get_error_signals(const Layer &parent) const override

lbann
Definition: callback_helpers.hpp:32

lbann::cross_grid_sum_layer::bp_setup_gradient_wrt_inputs
void bp_setup_gradient_wrt_inputs() final
Setup gradient w.r.t. input tensors. Called by the &#39;back_prop&#39; function. Each gradient w...
Definition: cross_grid_sum.hpp:168