LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
weighted_sum.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_LAYER_WEIGHTED_SUM_HPP_INCLUDED
28 #define LBANN_LAYER_WEIGHTED_SUM_HPP_INCLUDED
29 
32 #include <vector>
33 
34 namespace lbann {
35 
37 template <typename TensorDataType,
39  El::Device Dev = El::Device::CPU>
40 class weighted_sum_layer : public data_type_layer<TensorDataType>
41 {
42 private:
44  std::vector<DataType> m_scaling_factors;
45 
46 public:
47  weighted_sum_layer(lbann_comm* comm, std::vector<DataType> scaling_factors)
48  : data_type_layer<TensorDataType>(comm), m_scaling_factors(scaling_factors)
49  {
50  this->m_expected_num_parent_layers = -1; // No limit on parents
51  }
52 
53  weighted_sum_layer* copy() const override
54  {
55  return new weighted_sum_layer(*this);
56  }
57 
59 
61  template <typename ArchiveT>
62  void serialize(ArchiveT& ar);
63 
65 
66  std::string get_type() const override { return "weighted sum"; }
67  data_layout get_data_layout() const override { return T_layout; }
68  El::Device get_device_allocation() const override { return Dev; }
69  bool can_run_inplace() const override { return true; }
70  int get_backprop_requirements() const override { return ERROR_SIGNALS; }
71 
72  description get_description() const override
73  {
75  std::stringstream ss;
76  for (size_t i = 0; i < m_scaling_factors.size(); ++i) {
77  ss << (i > 0 ? ", " : "") << m_scaling_factors[i];
78  }
79  desc.add("Scaling factors", ss.str());
80  return desc;
81  }
82 
83 protected:
85  void write_specific_proto(lbann_data::Layer& proto) const final;
86 
87  friend class cereal::access;
89 
90  void setup_pointers() override
91  {
93  std::stringstream err;
94  if (this->get_num_parents() < 1) {
95  err << get_type() << " layer \"" << this->get_name() << "\" "
96  << "has no parent layers";
97  LBANN_ERROR(err.str());
98  }
99  if ((int)m_scaling_factors.size() != this->get_num_parents()) {
100  err << get_type() << " layer \"" << this->get_name() << "\" "
101  << "has an invalid number of scaling factors "
102  << "(found " << m_scaling_factors.size() << ", "
103  << "but there are " << this->get_num_parents() << " parent layers)";
104  LBANN_ERROR(err.str());
105  }
106  }
107 
108  void setup_dims() override
109  {
111  this->set_output_dims(this->get_input_dims());
112 
113  // Check that input dimensions match
114  const auto& output_dims = this->get_output_dims();
115  for (int i = 0; i < this->get_num_parents(); ++i) {
116  if (this->get_input_dims(i) != output_dims) {
117  const auto& parents = this->get_parent_layers();
118  std::stringstream err;
119  err << get_type() << " layer \"" << this->get_name() << "\" "
120  << "has input tensors with incompatible dimensions (";
121  for (int j = 0; j < this->get_num_parents(); ++j) {
122  const auto& dims = this->get_input_dims(j);
123  err << (j > 0 ? ", " : "") << "layer \"" << parents[j]->get_name()
124  << "\" outputs ";
125  for (size_t k = 0; k < dims.size(); ++k) {
126  err << (k > 0 ? " x " : "") << dims[k];
127  }
128  }
129  err << ")";
130  LBANN_ERROR(err.str());
131  }
132  }
133  }
134 
135  void fp_compute() override
136  {
137  auto& output = this->get_activations();
138 
139  // Special case for the first input so that in-place operation works
140  if (!this->m_runs_inplace)
141  El::Copy(this->get_prev_activations(0), output);
142 
143  El::Scale(m_scaling_factors[0], output);
144  for (int i = 1; i < this->get_num_parents(); ++i) {
145  El::Axpy(m_scaling_factors[i], this->get_prev_activations(i), output);
146  }
147  }
148 
149  void bp_compute() override
150  {
151  const auto& gradient_wrt_output = this->get_prev_error_signals();
152 
153  for (int i = 1; i < this->get_num_parents(); ++i) {
154  auto& gradient_wrt_input = this->get_error_signals(i);
155  El::Zero(gradient_wrt_input);
156  El::Axpy(m_scaling_factors[i], gradient_wrt_output, gradient_wrt_input);
157  }
158 
159  // Special case for the first input so that in-place operation works
160  if (!this->m_runs_inplace)
161  El::Copy(gradient_wrt_output, this->get_error_signals(0));
162  El::Scale(m_scaling_factors[0], this->get_error_signals(0));
163  }
164 };
165 
166 #ifndef LBANN_WEIGHTED_SUM_LAYER_INSTANTIATE
167 #define PROTO_DEVICE(T, Device) \
168  extern template class weighted_sum_layer<T, \
169  data_layout::DATA_PARALLEL, \
170  Device>; \
171  extern template class weighted_sum_layer<T, \
172  data_layout::MODEL_PARALLEL, \
173  Device>
174 
176 #undef PROTO_DEVICE
177 #endif // LBANN_WEIGHTED_SUM_LAYER_INSTANTIATE
178 
179 } // namespace lbann
180 
181 #endif // LBANN_LAYER_WEIGHTED_SUM_HPP_INCLUDED
void write_specific_proto(lbann_data::Layer &proto) const final
void bp_compute() override
Compute objective funciton gradients. Called by the &#39;back_prop&#39; function. Given the input...
virtual void setup_dims()
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
weighted_sum_layer(lbann_comm *comm, std::vector< DataType > scaling_factors)
void setup_dims() override
Setup tensor dimensions Called by the &#39;setup&#39; function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Add tensors with scaling factors.
bool m_runs_inplace
If true, the layer will run in-place (the input and output activations point to the same tensor)...
Definition: layer.hpp:872
#define LBANN_ERROR(...)
Definition: exception.hpp:37
int get_num_parents() const noexcept
Get number of parent layers.
Definition: layer.hpp:574
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
Generates nicely formatted description messages.
Definition: description.hpp:49
friend class cereal::access
virtual description get_description() const
Human-readable description.
void serialize(ArchiveT &ar)
constexpr El::Device Device
OutputAbsDistMatrixType & get_prev_error_signals(int child_index=0)
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
description get_description() const override
Human-readable description.
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
void fp_compute() override
Apply layer operation. Called by the &#39;forward_prop&#39; function. Given the input tensors, the output tensors are populated with computed values.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
std::vector< const Layer * > get_parent_layers() const
std::string get_name() const
Get the layer instance&#39;s name.
Definition: layer.hpp:332
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
void setup_pointers() override
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
weighted_sum_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
std::string get_type() const override
Get the layer type&#39;s name.
virtual void setup_pointers()
Setup layer pointers. Called by the &#39;setup&#39; function. Pointers to parent/child layers are assumed to ...
std::vector< int > get_output_dims(size_t output_index=0) const
Get output tensor dimensions.
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
std::vector< DataType > m_scaling_factors
int m_expected_num_parent_layers
Definition: layer.hpp:838
const InputAbsDistMatrixType & get_error_signals(const Layer &parent) const override