27 #ifndef LBANN_LAYER_SPLIT_HPP_INCLUDED 28 #define LBANN_LAYER_SPLIT_HPP_INCLUDED 32 #include "lbann/proto/lbann.pb.h" 39 #ifdef LBANN_HAS_DISTCONV 40 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
41 class split_distconv_adapter :
public data_type_distconv_adapter<TensorDataType>
46 split_distconv_adapter(Layer& layer)
47 : data_type_distconv_adapter<TensorDataType>(layer)
49 virtual ~split_distconv_adapter() =
default;
50 void setup_distributions(tensor_overlap_constraints& constraints)
override;
51 dc::Shape get_activations_local_shape(
int index)
const override;
52 std::unique_ptr<TensorDevType> setup_activations_i(
int index)
const override;
55 #endif // LBANN_HAS_DISTCONV 69 template <
typename TensorDataType,
77 this->m_expected_num_child_layers = -1;
85 template <
typename ArchiveT>
89 std::string
get_type()
const override {
return "split"; }
96 void fill_onnx_node(onnx::GraphProto& graph)
const override;
97 #endif // LBANN_HAS_ONNX 101 void write_specific_proto(lbann_data::Layer& proto)
const final;
103 El::SyncInfo<Dev> syncSubGridCommunication = El::SyncInfo<Dev>();
105 friend class cereal::access;
111 for (
int i = 0; i < this->get_num_children(); ++i) {
112 this->set_output_dims(this->get_input_dims(), i);
119 const auto& input = this->get_prev_activations();
120 auto mini_batch_size =
121 this->infer_mini_batch_size_from_parents_or_default_to_current();
123 if (this->subgraph_parallelism_execution()) {
126 auto const* ptr_input =
dynamic_cast<El::DistMatrix<TensorDataType,
130 Dev
> const*>(&input);
132 auto childs = this->get_child_layers();
133 if (this->get_communication_flag() ==
COLL_OPT) {
134 El::copy::TranslateBetweenGridsBroadcast<TensorDataType, Dev, Dev>(
136 this->get_branch_tag_input_vector(),
137 this->get_subgrid_comm(),
138 syncSubGridCommunication);
140 else if (this->get_communication_flag() ==
COLL) {
141 El::copy::TranslateBetweenGridsBroadcast<TensorDataType, Dev, Dev>(
143 this->get_branch_tag_input_vector());
146 for (
int i = 0; i < childs[0]->get_num_spliting_groups(); i++) {
148 this->get_branch_tag_input(i).Resize(ptr_input->Height(),
150 El::Copy(input, this->get_branch_tag_input(i));
153 for (
int i = 0; i < this->get_num_children(); ++i) {
154 tag = childs[i]->get_grid_tag();
156 El::LockedView(this->get_activations(i),
157 this->get_branch_tag_input(tag - 1));
162 for (
int i = 0; i < this->get_num_children(); ++i) {
163 El::LockedView(this->get_activations(i), input);
173 #ifdef LBANN_HAS_DISTCONV 174 if (this->distconv_enabled()) {
175 get_distconv_adapter().bp_compute();
178 #endif // LBANN_HAS_DISTCONV 180 auto& gradient_wrt_input = this->get_error_signals();
181 auto childs = this->get_child_layers();
183 if (this->subgraph_parallelism_execution()) {
186 std::vector<bool> is_initialized_tensor(
187 childs[0]->get_num_spliting_groups(),
191 for (
int i = 0; i < this->get_num_children(); ++i) {
192 tag = childs[i]->get_grid_tag();
194 if (is_initialized_tensor[tag - 1]) {
195 El::Axpy(DataType(1),
196 this->get_prev_error_signals(i),
197 this->get_branch_tag_input(tag - 1));
200 El::Copy(this->get_prev_error_signals(i),
201 this->get_branch_tag_input(tag - 1));
202 is_initialized_tensor[tag - 1] =
true;
208 if (this->get_communication_flag() ==
COLL_OPT)
212 auto* ptr_gradient =
dynamic_cast< 213 El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>*
>(
214 &gradient_wrt_input);
216 El::copy::TranslateBetweenGridsAllreduce<TensorDataType, Dev, Dev>(
218 this->get_branch_tag_input_vector(),
219 this->get_subgrid_comm(),
220 syncSubGridCommunication);
222 else if (this->get_communication_flag() ==
COLL) {
223 auto* ptr_gradient =
dynamic_cast< 224 El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Dev>*
>(
225 &gradient_wrt_input);
227 El::copy::TranslateBetweenGridsAllreduce<TensorDataType, Dev, Dev>(
229 this->get_branch_tag_input_vector(),
233 if (this->get_num_children() > 0) {
234 El::Copy(this->get_branch_tag_input(0), gradient_wrt_input);
237 El::Zero(gradient_wrt_input);
240 for (
int i = 1; i < childs[0]->get_num_spliting_groups(); i++) {
242 El::Copy(this->get_branch_tag_input(i), this->get_temp_grad());
243 El::Axpy(DataType(1), this->get_temp_grad(), gradient_wrt_input);
249 if (this->get_num_children() > 0) {
250 El::Copy(this->get_prev_error_signals(0), gradient_wrt_input);
253 El::Zero(gradient_wrt_input);
255 for (
int i = 1; i < this->get_num_children(); ++i) {
256 El::Axpy(DataType(1),
257 this->get_prev_error_signals(i),
263 #ifdef LBANN_HAS_DISTCONV 265 bool is_distconv_supported()
const override 269 void setup_distconv_adapter()
override 271 this->get_distconv_adapter_ptr() =
272 std::make_unique<split_distconv_adapter<TensorDataType, T_layout, Dev>>(
275 split_distconv_adapter<TensorDataType, T_layout, Dev>&
276 get_distconv_adapter()
override;
277 const split_distconv_adapter<TensorDataType, T_layout, Dev>&
278 get_distconv_adapter()
const override;
279 #endif // LBANN_HAS_DISTCONV 282 template <
typename T, data_layout L, El::Device D>
285 proto.set_datatype(proto::ProtoDataType<T>);
286 proto.mutable_split();
289 #ifdef LBANN_HAS_ONNX 290 template <
typename T, data_layout L, El::Device D>
293 const auto& parent = this->get_parent_layer();
294 const size_t idx_in_parent = parent.find_child_layer_index(*
this);
295 for (
auto const* child : this->get_child_layers()) {
297 identity->add_input(parent.get_name() +
"_" +
299 size_t idx = this->find_child_layer_index(*child);
304 identity->set_doc_string(this->get_type());
307 #endif // LBANN_HAS_ONNX 309 #ifdef LBANN_HAS_DISTCONV 310 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
311 split_distconv_adapter<TensorDataType, T_layout, Dev>&
314 return const_cast<split_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
316 .get_distconv_adapter());
319 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
320 const split_distconv_adapter<TensorDataType, T_layout, Dev>&
324 const split_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
328 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
329 void split_distconv_adapter<TensorDataType, T_layout, Dev>::setup_distributions(
334 auto& x = this->get_prev_activations_dist();
335 auto& y = this->get_activations_dist();
336 auto& dx = this->get_error_signals_dist();
337 auto& dy = this->get_prev_error_signals_dist();
343 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
344 dc::Shape split_distconv_adapter<TensorDataType, T_layout, Dev>::
345 get_activations_local_shape(
int index)
const 348 TensorDataType>::get_activations_local_shape(0);
351 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
353 typename split_distconv_adapter<TensorDataType, T_layout, Dev>::TensorDevType>
354 split_distconv_adapter<TensorDataType, T_layout, Dev>::setup_activations_i(
357 return std::make_unique<TensorDevType>(this->get_prev_activations(0));
359 #endif // LBANN_HAS_DISTCONV 361 #ifndef LBANN_SPLIT_LAYER_INSTANTIATE 362 #define PROTO_DEVICE(T, Device) \ 363 extern template class split_layer<T, data_layout::DATA_PARALLEL, Device>; \ 364 extern template class split_layer<T, data_layout::MODEL_PARALLEL, Device> 368 #ifdef LBANN_HAS_DISTCONV 369 #define PROTO_DEVICE(T, Device) \ 370 extern template class split_distconv_adapter<T, \ 371 data_layout::DATA_PARALLEL, \ 373 extern template class split_distconv_adapter<T, \ 374 data_layout::MODEL_PARALLEL, \ 379 #endif // LBANN_HAS_DISTCONV 380 #endif // LBANN_SPLIT_LAYER_INSTANTIATE 384 #endif // LBANN_LAYER_SPLIT_HPP_INCLUDED
virtual void setup_dims()
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
Present input tensor to multiple outputs.
void write_specific_proto(lbann_data::Layer &proto) const final
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
split_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
void setup_dims() override
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
constexpr El::Device Device
virtual void setup_distributions(tensor_overlap_constraints &constraints)
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
void mark_equivalent(dc::Dist &d1, dc::Dist &d2)
std::string to_string(El::Device const &d)
std::string get_type() const override
Get the layer type's name.
void identity(El::Matrix< uint8_t > &mat, El::Int height, El::Int width, El::Int channels=1)
::distconv::tensor::Shape Shape
void bp_compute() override
Compute objective funciton gradients. Called by the 'back_prop' function. Given the input...
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
void fp_setup_outputs() override
Setup output tensors. Called by the 'forward_prop' function. Each output tensor is resized to match t...
data_layout
Data layout that is optimized for different modes of parallelism.
split_layer(lbann_comm *comm)
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
void fp_compute() override
Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.
dc::TensorDev< OutputTensorDataType > TensorDevType