27 #ifndef LBANN_LAYER_SUM_HPP_INCLUDED 28 #define LBANN_LAYER_SUM_HPP_INCLUDED 32 #include "lbann/proto/lbann.pb.h" 38 #ifdef LBANN_HAS_DISTCONV 39 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
40 class sum_distconv_adapter :
public data_type_distconv_adapter<TensorDataType>
45 sum_distconv_adapter(Layer& layer)
46 : data_type_distconv_adapter<TensorDataType>(layer)
48 virtual ~sum_distconv_adapter() =
default;
49 std::unique_ptr<TensorDevType>
50 setup_error_signals_i(
int index)
const override;
53 #endif // LBANN_HAS_DISTCONV 56 template <
typename TensorDataType,
64 this->m_expected_num_parent_layers = -1;
72 template <
typename ArchiveT>
77 std::string
get_type()
const override {
return "sum"; }
85 void write_specific_proto(lbann_data::Layer& proto)
const final;
87 El::SyncInfo<Dev> syncSubGridCommunication = El::SyncInfo<Dev>();
89 friend class cereal::access;
95 if (this->get_num_parents() < 1) {
96 std::stringstream err;
97 err << get_type() <<
" layer \"" << this->get_name() <<
"\" " 98 <<
"has no parent layers";
106 this->set_output_dims(this->get_input_dims());
109 const auto& output_dims = this->get_output_dims();
110 for (
int i = 0; i < this->get_num_parents(); ++i) {
111 if (this->get_input_dims(i) != output_dims) {
112 const auto& parents = this->get_parent_layers();
113 std::stringstream err;
114 err << get_type() <<
" layer \"" << this->get_name() <<
"\" " 115 <<
"has input tensors with incompatible dimensions (";
116 for (
int j = 0; j < this->get_num_parents(); ++j) {
117 const auto& dims = this->get_input_dims(j);
118 err << (j > 0 ?
", " :
"") <<
"layer \"" << parents[j]->get_name()
120 for (
size_t k = 0; k < dims.size(); ++k) {
121 err << (k > 0 ?
" x " :
"") << dims[k];
132 #ifdef LBANN_HAS_DISTCONV 133 if (this->distconv_enabled()) {
134 get_distconv_adapter().fp_compute();
137 #endif // LBANN_HAS_DISTCONV 138 auto& output = this->get_activations();
139 const auto& parents = this->get_parent_layers();
141 if (this->subgraph_parallelism_execution()) {
144 std::vector<bool> is_initialized_tensor(this->m_num_spliting_groups,
148 for (
int i = 0; i < this->get_num_parents(); ++i) {
149 tag = parents[i]->get_grid_tag() - 1;
151 if (is_initialized_tensor[tag]) {
153 if (this->get_prev_activations(i).Participating()) {
154 El::Axpy(DataType(1),
155 this->get_prev_activations(i),
156 this->get_branch_tag_input(tag));
160 if (this->get_prev_activations(i).Participating()) {
161 El::Copy(this->get_prev_activations(i),
162 this->get_branch_tag_input(tag));
163 is_initialized_tensor[tag] =
true;
170 if (this->get_communication_flag() ==
COLL_OPT)
174 auto* ptr_output =
dynamic_cast< 175 El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>*
>(
178 El::copy::TranslateBetweenGridsAllreduce<TensorDataType, Dev, Dev>(
180 this->get_branch_tag_input_vector(),
181 this->get_subgrid_comm(),
182 syncSubGridCommunication,
185 else if (this->get_communication_flag() ==
COLL) {
186 auto* ptr_output =
dynamic_cast< 187 El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>*
>(
190 El::copy::TranslateBetweenGridsAllreduce<TensorDataType, Dev, Dev>(
192 this->get_branch_tag_input_vector());
195 if (this->get_num_parents() > 0) {
196 El::Copy(this->get_branch_tag_input(0), output);
202 for (
int i = 1; i < this->m_num_spliting_groups; i++) {
204 El::Copy(this->get_branch_tag_input(i), this->get_temp_grad());
205 El::Axpy(DataType(1), this->get_temp_grad(), output);
210 El::Copy(this->get_prev_activations(0), output);
211 for (
int i = 1; i < this->get_num_parents(); ++i) {
212 El::Axpy(DataType(1), this->get_prev_activations(i), output);
220 if (this->get_num_children() < 1) {
223 auto mini_batch_size =
224 this->infer_mini_batch_size_from_parents_or_default_to_current();
226 const bool align_outputs = this->get_num_parents() > 0;
227 const auto& alignment_dist =
228 (align_outputs ? this->get_prev_activations().DistData()
229 : this->get_activations().DistData());
232 for (
int i = 0; i < this->get_num_children(); ++i) {
233 #ifdef LBANN_HAS_DISTCONV 234 if (!this->keep_original_outputs(i))
236 #endif // LBANN_HAS_DISTCONV 238 auto& output = this->get_activations(i);
240 if (align_outputs && this->subgraph_parallelism_execution() ==
false) {
241 output.AlignWith(alignment_dist);
243 output.Resize(this->get_output_size(i), mini_batch_size);
250 const auto& parents = this->get_parent_layers();
251 const auto& gradient_wrt_output = this->get_prev_error_signals();
253 if (this->subgraph_parallelism_execution()) {
255 if (this->get_communication_flag() ==
COLL_OPT)
259 auto const* ptr_gradient =
260 dynamic_cast<El::DistMatrix<TensorDataType,
264 Dev
> const*>(&gradient_wrt_output);
265 El::copy::TranslateBetweenGridsBroadcast<TensorDataType, Dev, Dev>(
267 this->get_branch_tag_input_vector(),
268 this->get_subgrid_comm(),
269 syncSubGridCommunication);
271 else if (this->get_communication_flag() ==
COLL) {
272 auto const* ptr_gradient =
273 dynamic_cast<El::DistMatrix<TensorDataType,
277 Dev
> const*>(&gradient_wrt_output);
278 El::copy::TranslateBetweenGridsBroadcast<TensorDataType, Dev, Dev>(
280 this->get_branch_tag_input_vector());
283 for (
int i = 0; i < this->m_num_spliting_groups; i++) {
285 El::Copy(gradient_wrt_output, this->get_branch_tag_input(i));
290 for (
int i = 0; i < this->get_num_parents(); ++i) {
291 tag = parents[i]->get_grid_tag() - 1;
293 El::LockedView(this->get_error_signals(i),
294 this->get_branch_tag_input(tag));
298 for (
int i = 0; i < this->get_num_parents(); ++i) {
300 El::LockedView(this->get_error_signals(i), gradient_wrt_output);
307 #ifdef LBANN_HAS_DISTCONV 308 friend class sum_distconv_adapter<TensorDataType, T_layout, Dev>;
311 bool is_distconv_supported()
const override 315 void setup_distconv_adapter()
override 317 this->get_distconv_adapter_ptr() =
318 std::make_unique<sum_distconv_adapter<TensorDataType, T_layout, Dev>>(
321 sum_distconv_adapter<TensorDataType, T_layout, Dev>&
322 get_distconv_adapter()
override;
323 const sum_distconv_adapter<TensorDataType, T_layout, Dev>&
324 get_distconv_adapter()
const override;
325 #endif // LBANN_HAS_DISTCONV 328 template <
typename T, data_layout L, El::Device D>
331 proto.set_datatype(proto::ProtoDataType<T>);
335 #ifdef LBANN_HAS_DISTCONV 336 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
337 sum_distconv_adapter<TensorDataType, T_layout, Dev>&
340 return const_cast<sum_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
342 .get_distconv_adapter());
345 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
346 const sum_distconv_adapter<TensorDataType, T_layout, Dev>&
350 const sum_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
354 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
356 typename sum_distconv_adapter<TensorDataType, T_layout, Dev>::TensorDevType>
357 sum_distconv_adapter<TensorDataType, T_layout, Dev>::setup_error_signals_i(
360 return std::make_unique<TensorDevType>(this->get_prev_error_signals(0));
362 #endif // LBANN_HAS_DISTCONV 364 #ifndef LBANN_SUM_LAYER_INSTANTIATE 365 #define PROTO_DEVICE(T, Device) \ 366 extern template class sum_layer<T, data_layout::DATA_PARALLEL, Device>; \ 367 extern template class sum_layer<T, data_layout::MODEL_PARALLEL, Device> 371 #ifdef LBANN_HAS_DISTCONV 372 #define PROTO_DEVICE(T, Device) \ 373 extern template class sum_distconv_adapter<T, \ 374 data_layout::DATA_PARALLEL, \ 376 extern template class sum_distconv_adapter<T, \ 377 data_layout::MODEL_PARALLEL, \ 382 #endif // LBANN_HAS_DISTCONV 383 #endif // LBANN_SUM_LAYER_INSTANTIATE 387 #endif // LBANN_LAYER_SUM_HPP_INCLUDED virtual void setup_dims()
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
void setup_pointers() override
Setup layer pointers. Called by the 'setup' function. Pointers to parent/child layers are assumed to ...
constexpr El::Device Device
void fp_setup_outputs() override
Setup output tensors. Called by the 'forward_prop' function. Each output tensor is resized to match t...
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
void write_specific_proto(lbann_data::Layer &proto) const final
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
data_layout
Data layout that is optimized for different modes of parallelism.
void bp_compute() override
Compute objective funciton gradients. Called by the 'back_prop' function. Given the input...
sum_layer(lbann_comm *comm)
void bp_setup_gradient_wrt_inputs() override
Setup gradient w.r.t. input tensors. Called by the 'back_prop' function. Each gradient w...
sum_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
virtual void setup_pointers()
Setup layer pointers. Called by the 'setup' function. Pointers to parent/child layers are assumed to ...
std::string get_type() const override
Get the layer type's name.
void setup_dims() override
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
void fp_compute() override
Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
dc::TensorDev< OutputTensorDataType > TensorDevType