27 #ifndef LBANN_LAYERS_TRANSFORM_CONCATENATE_HPP_INCLUDED 28 #define LBANN_LAYERS_TRANSFORM_CONCATENATE_HPP_INCLUDED 37 #include "lbann/proto/layers.pb.h" 42 #ifdef LBANN_HAS_DISTCONV 43 template <
typename TensorDataType, data_layout Layout, El::Device Device>
44 class concatenate_distconv_adapter
45 :
public data_type_distconv_adapter<TensorDataType>
50 concatenate_distconv_adapter(Layer& layer)
51 : data_type_distconv_adapter<TensorDataType>(layer)
53 virtual ~concatenate_distconv_adapter() =
default;
54 dc::Shape get_activations_local_shape(
int index = 0)
const override;
58 #endif // LBANN_HAS_DISTCONV 65 template <
typename TensorDataType,
80 template <
typename ArchiveT>
85 std::string get_type()
const override;
87 El::Device get_device_allocation()
const override;
95 void write_specific_proto(lbann_data::Layer& proto)
const final;
97 El::SyncInfo<Device> syncSubGridCommunication = El::SyncInfo<Device>();
99 friend class cereal::access;
102 void setup_pointers()
override;
103 void setup_dims()
override;
105 void fp_setup_outputs()
override;
106 void bp_setup_gradient_wrt_inputs()
override;
107 void fp_compute()
override;
108 void bp_compute()
override;
120 std::vector<unsigned char> m_workspace;
126 gpu_lib::event_wrapper m_workspace_event;
127 #endif // LBANN_HAS_GPU 129 template <
typename U>
131 template <
typename U, El::Device D>
134 template <
typename U>
137 void fp_compute_subgrid();
139 void bp_compute_subgrid();
141 #ifdef LBANN_HAS_DISTCONV 142 friend class concatenate_distconv_adapter<TensorDataType, Layout, Device>;
145 bool is_distconv_supported()
const override 151 void setup_distconv_adapter()
override 153 this->get_distconv_adapter_ptr() = std::make_unique<
154 concatenate_distconv_adapter<TensorDataType, Layout, Device>>(*this);
156 concatenate_distconv_adapter<TensorDataType, Layout, Device>&
157 get_distconv_adapter()
override;
158 const concatenate_distconv_adapter<TensorDataType, Layout, Device>&
159 get_distconv_adapter()
const override;
160 #endif // LBANN_HAS_DISTCONV 167 template <
typename T, data_layout L, El::Device D>
169 lbann_data::Layer& proto)
const 171 proto.set_datatype(proto::ProtoDataType<T>);
172 auto* msg = proto.mutable_concatenation();
173 msg->set_axis(m_concat_dim);
176 template <
typename TensorDataType, data_layout Layout, El::Device Device>
185 template <
typename TensorDataType, data_layout Layout, El::Device Device>
192 template <
typename TensorDataType, data_layout Layout, El::Device Device>
195 return "concatenate";
198 template <
typename TensorDataType, data_layout Layout, El::Device Device>
205 template <
typename TensorDataType, data_layout Layout, El::Device Device>
212 template <
typename TensorDataType, data_layout Layout, El::Device Device>
221 template <
typename TensorDataType, data_layout Layout, El::Device Device>
234 template <
typename TensorDataType, data_layout Layout, El::Device Device>
242 std::ostringstream err;
244 <<
"is concatenating along dimension " <<
m_concat_dim <<
", " 245 <<
"but it has a " << output_dims.size() <<
"-D input tensor " 248 <<
"outputs with dimensions ";
249 for (
size_t d = 0; d < output_dims.size(); ++d) {
250 err << (d > 0 ?
" x " :
"") << output_dims[d];
259 if (input_dims.size() != output_dims.size() ||
260 !std::equal(input_dims.begin(),
262 output_dims.begin()) ||
266 std::ostringstream err;
268 <<
"expects input tensors with dimensions ";
269 for (
size_t d = 0; d < output_dims.size(); ++d) {
270 err << (d > 0 ?
" x " :
"");
275 err << output_dims[d];
278 err <<
", but parent layer " 280 <<
"outputs with dimensions ";
281 for (
size_t d = 0; d < input_dims.size(); ++d) {
282 err << (d > 0 ?
" x " :
"") << input_dims[d];
296 "attempted to concatenate along dimension ",
299 "but model-parallel concatenate layer " 300 "only supports flat data");
307 template <
typename TensorDataType, data_layout Layout, El::Device Device>
310 #ifdef LBANN_HAS_DISTCONV 311 if (!this->keep_original_outputs(0))
313 #endif // LBANN_HAS_DISTCONV 318 El::LockedView(output, input0);
322 output.AlignWith(input0);
329 template <
typename TensorDataType, data_layout Layout, El::Device Device>
338 auto* ptr_input =
dynamic_cast< 339 El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Device>*
>(
342 El::copy::TranslateBetweenGridsGather<TensorDataType, Device, Device>(
350 template <
typename TensorDataType, data_layout Layout, El::Device Device>
359 auto const* ptr_input_grad =
dynamic_cast< 361 DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, Device> const*
>(
365 El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
374 El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
383 El::copy::TranslateBetweenGridsScatter<TensorDataType, Device, Device>(
393 template <
typename TensorDataType, data_layout Layout, El::Device Device>
397 const size_t num_dims = input_dims.size();
398 #ifdef LBANN_HAS_DISTCONV 400 get_distconv_adapter().fp_compute();
420 template <
typename TensorDataType, El::Device Device>
424 #ifdef LBANN_HAS_DISTCONV 426 LBANN_ERROR(
"Model-parallel LBANN matrix not supported in distconv");
428 #endif // LBANN_HAS_DISTCONV 435 for (
size_t j = 0; j < num_inputs; ++j) {
438 El::LockedView(input_grad,
440 El::IR(offset, offset + input_size),
442 offset += input_size;
446 template <
typename TensorDataType, El::Device Device>
453 if (num_inputs == 1) {
454 #ifdef LBANN_HAS_DISTCONV 455 if (!l.keep_original_gradient_wrt_inputs(0))
461 for (
size_t j = 0; j < num_inputs; ++j) {
462 #ifdef LBANN_HAS_DISTCONV 463 if (!l.keep_original_gradient_wrt_inputs(j))
468 input_grad.AlignWith(output_grad);
475 template <
typename TensorDataType, data_layout Layout, El::Device Device>
482 template <
typename TensorDataType, data_layout Layout, El::Device Device>
487 const size_t num_dims = input_dims.size();
489 #ifdef LBANN_HAS_DISTCONV 491 get_distconv_adapter().bp_compute();
511 #ifdef LBANN_HAS_DISTCONV 512 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
513 concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&
517 concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
519 .get_distconv_adapter());
522 template <
typename TensorDataType, data_layout T_layout, El::Device Dev>
523 const concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&
527 const concatenate_distconv_adapter<TensorDataType, T_layout, Dev>&
>(
531 template <
typename TensorDataType, data_layout Layout, El::Device Device>
532 dc::Shape concatenate_distconv_adapter<TensorDataType, Layout, Device>::
533 get_activations_local_shape(
int index)
const 537 shape[-2] = this->get_activations_shape()[-2];
541 template <
typename TensorDataType, data_layout Layout, El::Device Device>
542 void concatenate_distconv_adapter<TensorDataType, Layout, Device>::fp_compute()
548 default_hydrogen_stream());
551 template <
typename TensorDataType, data_layout Layout, El::Device Device>
552 void concatenate_distconv_adapter<TensorDataType, Layout, Device>::bp_compute()
557 default_hydrogen_stream());
559 #endif // LBANN_HAS_DISTCONV 561 #ifndef LBANN_CONCATENATE_LAYER_INSTANTIATE 563 #define PROTO_DEVICE(T, Device) \ 564 extern template class concatenate_layer<T, \ 565 data_layout::DATA_PARALLEL, \ 567 extern template class concatenate_layer<T, \ 568 data_layout::MODEL_PARALLEL, \ 574 #endif // LBANN_CONCATENATE_LAYER_INSTANTIATE 578 #endif // LBANN_LAYERS_TRANSFORM_CONCATENATE_HPP_INCLUDED void fp_setup_outputs() override
Setup output tensors. Called by the 'forward_prop' function. Each output tensor is resized to match t...
bool distconv_enabled() const
Indicate whether distconv is enabled.
virtual void setup_dims()
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
data_layout get_data_layout() const override
Get data layout of the data tensors. We assume that the data layouts of the previous activations...
void setup_dims() override
Setup tensor dimensions Called by the 'setup' function. If there are any input tensors, the base method sets all uninitialized output tensor dimensions equal to the first input tensor dimensions.
std::vector< std::unique_ptr< InputAbsDistMatrixType > > & get_all_error_signals()
void write_specific_proto(lbann_data::Layer &proto) const final
void bp_compute() override
Compute objective funciton gradients. Called by the 'back_prop' function. Given the input...
size_t m_concat_dim
Tensor dimension to concatenate along.
El::Device get_device_allocation() const override
Get the device allocation for the data tensors. We assume that the decice allocation of the previous ...
auto get_linear_size(std::vector< T > const &dims)
friend void fp_compute_impl(concatenate_layer< U, Layout, Device > &, size_t)
El::SyncInfo< Device > syncSubGridCommunication
int get_output_size(size_t output_index=0) const
Get output tensor size.
int get_num_parents() const noexcept
Get number of parent layers.
void setup_pointers() override
Setup layer pointers. Called by the 'setup' function. Pointers to parent/child layers are assumed to ...
std::vector< int > get_input_dims(size_t input_index=0) const
Get input tensor dimensions.
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
Generates nicely formatted description messages.
std::vector< std::unique_ptr< InputAbsDistMatrixType > > & get_all_prev_activations()
virtual description get_description() const
Human-readable description.
constexpr El::Device Device
OutputAbsDistMatrixType & get_prev_error_signals(int child_index=0)
InputAbsDistMatrixType & get_prev_activations(int parent_index=0)
void fp_compute() override
Apply layer operation. Called by the 'forward_prop' function. Given the input tensors, the output tensors are populated with computed values.
const OutputAbsDistMatrixType & get_activations(const Layer &child) const override
int get_backprop_requirements() const override
Returns the necessary tensors for computing backpropagation.
Concatenate tensors along specified dimension.
int get_input_size(size_t input_index=0) const
Get input tensor size.
void set_output_dims(std::vector< int > dims, size_t output_index=0)
Set output tensor dimensions.
std::vector< const Layer * > get_parent_layers() const
::distconv::tensor::Shape Shape
std::string get_name() const
Get the layer instance's name.
void bp_compute_subgrid()
void fp_compute_subgrid()
concatenate_layer * copy() const override
Copy function. This function dynamically allocates memory for a layer instance and instantiates a cop...
void bp_setup_gradient_wrt_inputs() override
Setup gradient w.r.t. input tensors. Called by the 'back_prop' function. Each gradient w...
void bp_setup_gradient_wrt_inputs_impl(concatenate_layer< TensorDataType, data_layout::MODEL_PARALLEL, Device > &l)
data_layout
Data layout that is optimized for different modes of parallelism.
friend void bp_compute_impl(concatenate_layer< U, Layout, Device > &, size_t)
bool can_run_inplace() const override
If True, the computation can run in-place (feeding each input activations tensor as the corresponding...
SubGraphCommunication get_communication_flag()
El::mpi::Comm & get_subgrid_comm()
virtual void setup_pointers()
Setup layer pointers. Called by the 'setup' function. Pointers to parent/child layers are assumed to ...
std::string get_type() const override
Get the layer type's name.
friend void bp_setup_gradient_wrt_inputs_impl(concatenate_layer< U, Layout, D > &)
bool subgraph_parallelism_execution() const noexcept
int m_expected_num_parent_layers
description get_description() const override
Human-readable description.
const InputAbsDistMatrixType & get_error_signals(const Layer &parent) const override
dc::TensorDev< OutputTensorDataType > TensorDevType