dc/df4/cross__grid__sum__slice_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.

 #ifndef LBANN_LAYER_CROSS_GRID_SUM_SLICE_HPP_INCLUDED
 #define LBANN_LAYER_CROSS_GRID_SUM_SLICE_HPP_INCLUDED

 #include "lbann/layers/data_type_layer.hpp"
 #include "lbann/utils/exception.hpp"

 namespace lbann {

 template <typename TensorDataType, El::Device Dev>
 class cross_grid_sum_slice_layer : public data_type_layer<TensorDataType>
 {
 public:
   cross_grid_sum_slice_layer(lbann_comm* comm)
     : data_type_layer<TensorDataType>(comm)
   {
     this->m_expected_num_parent_layers = -1; // No limit on parents
     this->m_expected_num_child_layers = -1;  // No limit on children
   }

   cross_grid_sum_slice_layer* copy() const override
   {
     return new cross_grid_sum_slice_layer(*this);
   }
   std::string get_type() const override { return "cross_grid_sum_slice"; }
   data_layout get_data_layout() const override
   {
     return data_layout::DATA_PARALLEL;
   }
   El::Device get_device_allocation() const override { return Dev; }
   bool can_run_inplace() const override { return false; }
   int get_backprop_requirements() const override { return ERROR_SIGNALS; }

 protected:
   void write_specific_proto(lbann_data::Layer& proto) const final;

   El::SyncInfo<Dev> syncSubGridCommunication = El::SyncInfo<Dev>();

   void setup_pointers() override
   {
     data_type_layer<TensorDataType>::setup_pointers();
     if (this->get_num_parents() < 1) {
       LBANN_ERROR(get_type(),
                   " layer \"",
                   this->get_name(),
                   "\" has no parent layers");
     }
   }

   void setup_dims() override
   {
     data_type_layer<TensorDataType>::setup_dims();

     // Slice along last dimension
     int subgridCommSize = El::mpi::Size(this->get_subgrid_comm());
     const auto input_dims = this->get_input_dims();
     std::vector<int> output_dims_slice(input_dims);
     output_dims_slice.back() = int(output_dims_slice.back() / subgridCommSize);

     for (int i = 0; i < this->get_num_children(); ++i)
       this->set_output_dims(output_dims_slice, i);
   }

   void fp_compute() override
   {
     auto const subgrid_comm_rank = El::mpi::Rank(this->get_subgrid_comm());
     auto const subgrid_comm_size = El::mpi::Size(this->get_subgrid_comm());

     auto& output = this->get_activations(subgrid_comm_rank);
     auto& input = this->get_prev_activations(subgrid_comm_rank);
     // El::Copy(input, output);

     auto& output_cast = dynamic_cast<
       El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>&>(
       output);

     auto const sync_info_output =
       El::SyncInfoFromMatrix(output_cast.LockedMatrix());

     const El::Int mloc = input.LocalHeight();
     const El::Int nloc = input.LocalWidth();

     El::Matrix<TensorDataType, Dev> prev_allreduce(mloc, nloc),
       after_allreduce(mloc, nloc);

     El::Copy(input.LockedMatrix(), prev_allreduce);

     El::mpi::AllReduce(prev_allreduce.Buffer(),
                        after_allreduce.Buffer(),
                        mloc * nloc,
                        El::mpi::SUM,
                        this->get_subgrid_comm(),
                        sync_info_output);

     const auto input_dims = this->get_input_dims();
     int last_dim = input_dims.back();

     int last_dim_start_point = 1;
     for (int i = 0; i < int(input_dims.size()) - 1; ++i) {
       last_dim_start_point = last_dim_start_point * input_dims[i];
     }

     if (last_dim % subgrid_comm_size != 0)
       LBANN_ERROR("cross_grid_sum_slice layer: last dimension should be "
                   "divided by the number of branches in subgraph");

     int const last_dim_index =
       int(last_dim / subgrid_comm_size) * subgrid_comm_rank;

     El::copy::util::InterleaveMatrix(
       (last_dim / subgrid_comm_size),
       input.LocalWidth() * last_dim_start_point,
       after_allreduce.LockedBuffer(last_dim_index, 0),
       1,
       last_dim,
       output_cast.Buffer(),
       1,
       (last_dim / subgrid_comm_size),
       sync_info_output);
   }

   void fp_setup_outputs() override
   {

     if (this->get_num_children() < 1) {
       return;
     }
     // Determine distributed matrix alignment

     auto mini_batch_size =
       this->infer_mini_batch_size_from_parents_or_default_to_current();

     // Initialize output tensors
     for (int i = 0; i < this->get_num_children(); ++i) {
       auto& output = this->get_activations(i);
       output.Empty(false);
       output.Resize(this->get_output_size(i), mini_batch_size);
     }
   }

   void bp_setup_gradient_wrt_inputs() override
   {
     auto children = this->get_child_layers();
     auto const subgrid_comm_rank = El::mpi::Rank(this->get_subgrid_comm());
     auto const subgrid_comm_size = El::mpi::Size(this->get_subgrid_comm());
     const auto input_dims = this->get_input_dims();
     int last_dim = input_dims.back();

     auto& input_grad = this->get_error_signals(subgrid_comm_rank);
     const auto& gradient_wrt_output =
       this->get_prev_error_signals(subgrid_comm_rank);

     using MatrixType =
       El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>;
     auto& gradient_wrt_output_cast =
       dynamic_cast<const MatrixType&>(gradient_wrt_output);

     auto& gradient_wrt_input_cast = dynamic_cast<MatrixType&>(input_grad);

     int mloc = gradient_wrt_output_cast.LocalHeight();
     int nloc = gradient_wrt_output_cast.LocalWidth();
     int per_grid_last_dim = last_dim / subgrid_comm_size;

     El::Matrix<TensorDataType, Dev> temp_input(nloc, mloc),
       temp_output(nloc * (mloc / per_grid_last_dim), last_dim),
       transposed(nloc * (mloc / per_grid_last_dim), per_grid_last_dim),
       transposed_output(last_dim, nloc * (mloc / per_grid_last_dim));

     El::Copy(gradient_wrt_output_cast.LockedMatrix(), temp_input);
     temp_input.Resize(per_grid_last_dim, nloc * (mloc / per_grid_last_dim));

     El::Transpose(temp_input, transposed);

     El::mpi::AllGather(transposed.Buffer(),
                        mloc * nloc,
                        temp_output.Buffer(),
                        mloc * nloc,
                        this->get_subgrid_comm(),
                        syncSubGridCommunication);

     El::Transpose(temp_output, transposed_output);
     transposed_output.Resize(mloc * subgrid_comm_size, nloc);

     auto mini_batch_size =
       this->infer_mini_batch_size_from_parents_or_default_to_current();

     for (int i = 0; i < El::To<int>(children.size()); i++) {
       auto& gradient_wrt_input_cast_layer =
         dynamic_cast<MatrixType&>(this->get_error_signals(i));
       gradient_wrt_input_cast_layer.Resize(this->get_input_size(),
                                            mini_batch_size);
     }
     El::Copy(transposed_output, gradient_wrt_input_cast.Matrix());
   }

   void bp_compute() final {}
 };

 #ifndef LBANN_CROSS_GRID_SUM_SLICE_LAYER_INSTANTIATE
 #define PROTO_DEVICE(T, Device)                                                \
   extern template class cross_grid_sum_slice_layer<T, Device>

 #include "lbann/macros/instantiate_device.hpp"
 #undef PROTO_DEVICE

 #endif // LBANN_CROSS_GRID_SUM_SLICE_LAYER_INSTANTIATE

 } // namespace lbann

 #endif // LBANN_LAYER_CROSS_GRID_SUM_SLICE_HPP_INCLUDED
lbann::Layer::setup_dims
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.

lbann::ERROR_SIGNALS
Definition: base.hpp:207

lbann::cross_grid_sum_slice_layer::can_run_inplace
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
Definition: cross_grid_sum_slice.hpp:56

LBANN_ERROR
#define LBANN_ERROR(...)
Definition: exception.hpp:37

lbann::Layer::get_output_size
int get_output_size(size_t output_index=0) const
Get output tensor size.

lbann::Layer::get_num_parents
int get_num_parents() const noexcept
Get number of parent layers.
Definition: layer.hpp:574

lbann::lbann_comm
Definition: comm.hpp:105

lbann::cross_grid_sum_slice_layer::bp_compute
void bp_compute() final
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
Definition: cross_grid_sum_slice.hpp:222

lbann::Layer::get_input_dims
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.

lbann::Device
constexpr El::Device Device
Definition: OperatorTraits.hpp:62

lbann::data_type_layer< TensorDataType >::get_prev_error_signals
OutputAbsDistMatrixType & get_prev_error_signals(int child_index=0)

lbann::data_type_layer< TensorDataType >::get_prev_activations
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)

lbann::data_type_layer< TensorDataType >::infer_mini_batch_size_from_parents_or_default_to_current
El::Int infer_mini_batch_size_from_parents_or_default_to_current() const override

lbann::data_type_layer< TensorDataType >::get_activations
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override

lbann::Layer::m_expected_num_child_layers
int m_expected_num_child_layers
Expected number of child layers. A negative value indicates no limit.
Definition: layer.hpp:842

lbann::Layer::get_input_size
int get_input_size(size_t input_index=0) const
Get input tensor size.

lbann::Layer::set_output_dims
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.

lbann::cross_grid_sum_slice_layer::get_type
std::string get_type() const override
Get the layer type&#39;s name.
Definition: cross_grid_sum_slice.hpp:50

lbann::Layer::get_num_children
int get_num_children() const noexcept
Get number of child layers.
Definition: layer.hpp:576

lbann::cross_grid_sum_slice_layer::write_specific_proto
void write_specific_proto(lbann_data::Layer &proto) const final

lbann::cross_grid_sum_slice_layer
Definition: cross_grid_sum_slice.hpp:36

lbann::data_type_layer
Definition: data_type_layer.hpp:69

lbann::cross_grid_sum_slice_layer::get_data_layout
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
Definition: cross_grid_sum_slice.hpp:51

lbann::cross_grid_sum_slice_layer::setup_pointers
void setup_pointers() override
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
Definition: cross_grid_sum_slice.hpp:65

lbann::data_layout::DATA_PARALLEL

lbann::Layer::get_name
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332

lbann::Layer::get_child_layers
std::vector< const Layer * > get_child_layers() const

lbann::cross_grid_sum_slice_layer::get_backprop_requirements
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Definition: cross_grid_sum_slice.hpp:57

lbann::cross_grid_sum_slice_layer::setup_dims
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Definition: cross_grid_sum_slice.hpp:76

exception.hpp

instantiate_device.hpp

lbann::cross_grid_sum_slice_layer::cross_grid_sum_slice_layer
cross_grid_sum_slice_layer(lbann_comm *comm)
Definition: cross_grid_sum_slice.hpp:39

lbann::cross_grid_sum_slice_layer::syncSubGridCommunication
El::SyncInfo< Dev > syncSubGridCommunication
Definition: cross_grid_sum_slice.hpp:63

lbann::data_layout
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218

lbann::cross_grid_sum_slice_layer::bp_setup_gradient_wrt_inputs
void bp_setup_gradient_wrt_inputs() override
Setup gradient w.r.t. input tensors. Called by the &#39;back_prop&#39; function. Each gradient w...
Definition: cross_grid_sum_slice.hpp:167

lbann::cross_grid_sum_slice_layer::get_device_allocation
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
Definition: cross_grid_sum_slice.hpp:55

data_type_layer.hpp

lbann::cross_grid_sum_slice_layer::copy
cross_grid_sum_slice_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
Definition: cross_grid_sum_slice.hpp:46

lbann::data_type_layer< TensorDataType >::get_subgrid_comm
El::mpi::Comm & get_subgrid_comm()
Definition: data_type_layer.hpp:182

lbann::cross_grid_sum_slice_layer::fp_setup_outputs
void fp_setup_outputs() override
Setup output tensors. Called by the &#39;forward_prop&#39; function. Each output tensor is resized to match t...
Definition: cross_grid_sum_slice.hpp:148

lbann::Layer::setup_pointers
virtual void setup_pointers()
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...

lbann::Layer::m_expected_num_parent_layers
int m_expected_num_parent_layers
Definition: layer.hpp:838

lbann::data_type_layer< TensorDataType >::get_error_signals
const InputAbsDistMatrixType & get_error_signals(const Layer &parent) const override

lbann::cross_grid_sum_slice_layer::fp_compute
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
Definition: cross_grid_sum_slice.hpp:90

lbann
Definition: callback_helpers.hpp:32