|
LBANN
0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
|
#include <comm.hpp>
Public Member Functions | |
| lbann_comm (int procs_per_trainer=0, El::mpi::Comm world=El::mpi::COMM_WORLD.GetMPIComm()) | |
| lbann_comm (const lbann_comm &)=delete | |
| lbann_comm & | operator= (const lbann_comm &)=delete |
| ~lbann_comm () | |
| void | split_trainers (int procs_per_trainer=-1, int trainer_grid_height=-1) |
| Construct communicators for trainers. More... | |
| void | split_trainer_grid (int num_process_primary_grid=0, bool create_two_models=false, bool enable_async_comm=false, bool enable_topo_aware=false) |
| GridType | get_grid_type () const noexcept |
| int | get_trainer_rank () const noexcept |
| int | get_rank_in_trainer () const noexcept |
| int | get_rank_in_world () const |
| int | get_world_rank (int trainer, int rank) const noexcept |
| int | map_world_rank_to_trainer_rank (int world_rank) const noexcept |
| int | map_world_rank_to_rank_in_trainer (int world_rank) const noexcept |
| int | get_trainer_master () const noexcept |
| int | get_intertrainer_master () const noexcept |
| int | get_world_master () const noexcept |
| bool | am_trainer_master () const noexcept |
| bool | am_world_master () const noexcept |
| El::Grid & | get_trainer_grid () |
| const El::Grid & | get_trainer_grid () const |
| El::Grid & | get_secondary_grid () |
| const El::Grid & | get_secondary_grid () const |
| El::Grid & | get_subset_grid () |
| const El::Grid & | get_subset_grid () const |
| int | get_num_trainers () const noexcept |
| int | get_procs_per_trainer () const noexcept |
| int | get_procs_per_node () const noexcept |
| int | get_procs_in_world () const |
| int | get_rank_in_node () const noexcept |
| bool | is_world_rank_on_node (int rank) const |
| int | get_default_threads_per_proc () const noexcept |
| void | reset_threads () const noexcept |
| void | intertrainer_sum_matrix (AbsMat &mat) const |
| void | intertrainer_sum_matrix (AbsDistMat &mat) const |
| void | intertrainer_broadcast_matrix (AbsMat &mat, int root) const |
| void | intertrainer_broadcast_matrix (AbsDistMat &mat, int root) const |
| template<typename T , bool S = is_instantiated_El_mpi_type<T>::value> | |
| void | broadcast (int root, T &val, const El::mpi::Comm &c) const |
| Broadcast a scalar value over an arbitrary communicator. More... | |
| template<typename T > | |
| void | broadcast_custom (int root, T &val, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | broadcast_native (int root, T &val, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | world_broadcast (int root, T &val) const |
| World broadcast of a scalar. More... | |
| template<typename T > | |
| void | intertrainer_broadcast (int root, T &val) const |
| Inter-trainer broadcast of a scalar. More... | |
| template<typename T > | |
| void | trainer_broadcast (int root, T &val) const |
| Within-trainer broadcast of a scalar. More... | |
| template<typename T > | |
| void | broadcast (const int root, T *data, const int count, const El::mpi::Comm &c) const |
| template<typename T , El::Device D, bool S = is_instantiated_El_mpi_type<T>::value> | |
| void | broadcast (const int root, T *data, const int count, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| void | world_broadcast (const int root, T *data, const int count) const |
| World broadcast of a buffer. More... | |
| template<typename T , El::Device D> | |
| void | world_broadcast (const int root, T *data, const int count, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| void | intertrainer_broadcast (const int root, T *data, const int count) const |
| Inter-trainer broadcast of a buffer. More... | |
| template<typename T , El::Device D> | |
| void | intertrainer_broadcast (const int root, T *data, const int count, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| void | trainer_broadcast (const int root, T *data, const int count) const |
| Within-trainer broadcast of a buffer. More... | |
| template<typename T , El::Device D> | |
| void | trainer_broadcast (const int root, T *data, const int count, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| size_t | resize (const int root, std::vector< T > &data, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | broadcast (const int root, std::vector< T > &data, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | world_broadcast (int root, std::vector< T > &data) const |
| Broadcast vector<> to world. More... | |
| template<typename T > | |
| void | intertrainer_broadcast (int root, std::vector< T > &data) const |
| Broadcast vector<> across trainers. More... | |
| template<typename T > | |
| void | trainer_broadcast (int root, std::vector< T > &data) const |
| Broadcast vector<> within trainer. More... | |
| template<typename T > | |
| void | all_gather (const T *src, int src_count, T *rcv, int rcv_count, const El::mpi::Comm &c) const |
| template<typename T , El::Device D> | |
| void | all_gather (const T *src, int src_count, T *rcv, int rcv_count, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| void | all_gather (std::vector< T > const &src, std::vector< T > &rcs, std::vector< int > const &rcv_counts, std::vector< int > const &rcv_disp, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | trainer_all_gather (std::vector< T > const &src, std::vector< T > &rcs, std::vector< int > const &rcv_counts, std::vector< int > const &rcv_disp) const |
| template<typename T > | |
| void | all_gather (T const &src, std::vector< T > &data, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | world_all_gather (T const &src, std::vector< T > &data) const |
| template<typename T > | |
| void | trainer_all_gather (T const &src, std::vector< T > &data) const |
| template<typename T > | |
| void | trainer_gather (T snd, int root) const |
| template<typename T > | |
| void | trainer_gather (T snd, T *rcv) const |
| template<typename T > | |
| void | trainer_gather (T const *snd, int count, int root) const |
| template<typename T > | |
| void | trainer_gather (T const *snd, int count, T *rcv) const |
| template<typename T > | |
| void | trainer_gatherv (T const *snd, int count, int root) const |
| template<typename T > | |
| void | trainer_gatherv (T const *snd, int count, T *rcv, int const *rcv_counts, int const *rcv_displacements) const |
| template<typename T > | |
| void | intertrainer_gather (T snd, int root) const |
| template<typename T > | |
| void | intertrainer_gather (T snd, std::vector< T > &rcv) const |
| template<typename T > | |
| void | intertrainer_gather (T const *snd, int count, int root) const |
| template<typename T > | |
| void | intertrainer_gather (T const *snd, int count, T *rcv) const |
| template<typename T > | |
| void | gather (T snd, int root, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | gather (T snd, T *rcv, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | gather (T snd, std::vector< T > &rcv, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | gather (T const *snd, int count, int root, const El::mpi::Comm &c) const |
| template<typename T , El::Device D> | |
| void | gather (T const *snd, int count, int root, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| void | gather (T const *snd, int count, T *rcv, const El::mpi::Comm &c) const |
| template<typename T , El::Device D> | |
| void | gather (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| T | scatter (int root, const El::mpi::Comm &c) const |
| template<typename T > | |
| T | scatter (T const *snd, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | intertrainer_reduce (T snd, int root, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| T | intertrainer_reduce (T snd, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | trainer_reduce (T snd, int root, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| T | trainer_reduce (T snd, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | trainer_reduce (T const *snd, int count, int root, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | trainer_reduce (T const *snd, int count, T *rcv, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | reduce (T snd, int root, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| T | reduce (T snd, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | reduce (T const *snd, int count, int root, const El::mpi::Comm &c) const |
| template<typename T , El::Device D> | |
| void | reduce (T const *snd, int count, int root, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| void | reduce (T const *snd, int count, int root, const El::mpi::Comm &c, El::mpi::Op op) const |
| template<typename T , El::Device D> | |
| void | reduce (T const *snd, int count, int root, const El::mpi::Comm &c, El::mpi::Op op, El::SyncInfo< D > const &syncInfo) const |
| template<typename T , El::Device D> | |
| void | reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| void | reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::mpi::Op op) const |
| template<typename T , El::Device D> | |
| void | reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::mpi::Op op, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| T | intertrainer_allreduce (T snd, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| T | trainer_allreduce (T snd, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | trainer_allreduce (T const *snd, int count, T *rcv, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| T | allreduce (T snd, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | allreduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | allreduce (T *data, int count, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const |
| template<typename TensorDataType > | |
| void | allreduce (El::AbstractMatrix< TensorDataType > &m, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const |
| template<typename TensorDataType > | |
| void | allreduce (El::AbstractDistMatrix< TensorDataType > &m, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const |
| template<typename TensorDataType > | |
| void | nb_allreduce (El::AbstractMatrix< TensorDataType > &m, const El::mpi::Comm &c, Al::request &req, El::mpi::Op op=El::mpi::SUM) const |
| template<typename TensorDataType > | |
| void | nb_allreduce (El::AbstractDistMatrix< TensorDataType > &m, const El::mpi::Comm &c, Al::request &req, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | nb_allreduce (T *data, int count, const El::mpi::Comm &c, Al::request &req, El::mpi::Op op=El::mpi::SUM) const |
| template<typename T > | |
| void | wait_all (std::vector< El::mpi::Request< T >> &req) const |
| template<typename T > | |
| void | wait (El::mpi::Request< T > &req) const |
| void | wait (Al::request &req) const |
| bool | test (Al::request &req) const |
| void | intertrainer_barrier () const |
| void | trainer_barrier () const |
| void | global_barrier () const |
| void | barrier (const El::mpi::Comm &c) const |
| template<typename T > | |
| void | send (const T *data, int count, int trainer, int rank) const |
| template<typename T , El::Device D> | |
| void | send (const T *data, int count, int trainer, int rank, El::SyncInfo< D > const &syncInfo) const |
| template<typename T , El::Device D> | |
| void | send (const T *data, int count, int trainer, El::SyncInfo< D > const &syncInfo) const |
| void | send (const AbsMat &mat, int trainer, int rank) const |
| void | send (const DistMat &mat, int trainer, int rank) const |
| void | send (const AbsMat &mat, int trainer) const |
| void | send (const DistMat &mat, int trainer) const |
| template<typename T > | |
| void | nb_send (const T *data, int count, int trainer, int rank, El::mpi::Request< T > &req) const |
| template<typename T > | |
| void | nb_tagged_send (const T *data, int count, int rank, int tag, El::mpi::Request< T > &req, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | nb_send (const T *data, int count, int trainer, El::mpi::Request< T > &req) const |
| void | nb_send (const AbsMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const |
| void | nb_send (const DistMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const |
| void | nb_send (const AbsMat &mat, int trainer, El::mpi::Request< DataType > &req) const |
| void | nb_send (const DistMat &mat, int trainer, El::mpi::Request< DataType > &req) const |
| template<typename T > | |
| void | recv (T *data, int count, int trainer, int rank) const |
| template<typename T > | |
| void | recv (T *data, int count, int trainer) const |
| template<typename T > | |
| void | recv (T *data, int count) const |
| template<typename T , El::Device D> | |
| void | recv (T *data, int count, int trainer, int rank, El::SyncInfo< D > const &syncInfo) const |
| template<typename T , El::Device D> | |
| void | recv (T *data, int count, int trainer, El::SyncInfo< D > const &syncInfo) const |
| void | recv (AbsMat &mat, int trainer, int rank) const |
| void | recv (DistMat &mat, int trainer, int rank) const |
| void | recv (AbsMat &mat, int trainer) const |
| void | recv (DistMat &mat, int trainer) const |
| template<typename T , El::Device D> | |
| void | recv (T *data, int count, El::SyncInfo< D > const &syncInfo) const |
| void | recv (AbsMat &mat) const |
| void | recv (DistMat &mat) const |
| template<typename T > | |
| void | nb_recv (T *data, int count, int trainer, int rank, El::mpi::Request< T > &req) const |
| template<typename T > | |
| void | nb_tagged_recv (T *data, int count, int rank, int tag, El::mpi::Request< T > &req, const El::mpi::Comm &c) const |
| template<typename T > | |
| void | nb_recv (T *data, int count, int trainer, El::mpi::Request< T > &req) const |
| void | nb_recv (AbsMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const |
| void | nb_recv (DistMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const |
| void | nb_recv (AbsMat &mat, int trainer, El::mpi::Request< DataType > &req) const |
| void | nb_recv (DistMat &mat, int trainer, El::mpi::Request< DataType > &req) const |
| template<typename T > | |
| void | nb_recv (T *data, int count, El::mpi::Request< T > &req) const |
| void | nb_recv (AbsMat &mat, El::mpi::Request< DataType > &req) const |
| void | nb_recv (DistMat &mat, El::mpi::Request< DataType > &req) const |
| template<typename T , El::Device D> | |
| void | sendrecv (const T *snd, int send_count, int send_trainer, int send_rank, T *rcv, int recv_count, int recv_trainer, int recv_rank) const |
| template<typename T , El::Device D> | |
| void | sendrecv (const T *snd, int send_count, int send_trainer, T *rcv, int recv_count, int recv_trainer) const |
| template<typename T , El::Device D> | |
| void | sendrecv (const T *snd, int send_count, int send_trainer, int send_rank, T *rcv, int recv_count, int recv_trainer, int recv_rank, El::SyncInfo< D > const &syncInfo) const |
| template<typename T , El::Device D> | |
| void | sendrecv (const T *snd, int send_count, int send_trainer, T *rcv, int recv_count, int recv_trainer, El::SyncInfo< D > const &syncInfo) const |
| template<typename T > | |
| int | get_count (int trainer, int rank) const |
| template<typename T > | |
| int | get_count (int trainer) const |
| size_t | get_num_trainer_barriers () const noexcept |
| size_t | get_num_intertrainer_barriers () const noexcept |
| size_t | get_num_global_barriers () const noexcept |
| size_t | get_bytes_sent () const noexcept |
| size_t | get_bytes_received () const noexcept |
| void | reset_stats_counters () noexcept |
| const El::mpi::Comm & | get_intertrainer_comm () const noexcept |
| const El::mpi::Comm & | get_trainer_comm () const noexcept |
| const El::mpi::Comm & | get_combined_grid_comm () const noexcept |
| const El::mpi::Comm & | get_world_comm () const noexcept |
| const El::mpi::Comm & | get_node_comm () const noexcept |
| const El::mpi::Comm & | get_KFAC_comm () const noexcept |
| std::vector< int > | get_primary_grid_ranks () |
| std::vector< int > | get_secondary_grid_ranks () |
| bool | get_KFAC_subgrid_create_two_models () |
| bool | enable_subgrid_async_communication () |
| const El::mpi::Comm & | get_packed_group_comm (int num_per_group) const |
| bool | is_rank_node_local (int rank, const El::mpi::Comm &comm) const |
| void | lbann_comm_abort (std::string msg) const |
Static Public Member Functions | |
| static bool | is_sendable (const AbsMat &mat) noexcept |
| static bool | is_sendable (const AbsDistMat &dist_mat) noexcept |
Private Member Functions | |
| void | setup_node_comm () |
| void | setup_threads () |
| void | count_bytes_broadcast (const size_t bytes, const int rank, const int root) const noexcept |
Private Attributes | |
| const El::mpi::Comm | m_world_comm |
| El::mpi::Comm | m_trainer_comm |
| El::mpi::Comm | m_intertrainer_comm |
| El::mpi::Comm | m_node_comm |
| El::mpi::Comm | m_primary_grid_comm |
| El::mpi::Comm | m_secondary_grid_comm |
| El::mpi::Comm | m_combined_grid_comm |
| std::unordered_map< int, El::mpi::Comm > | m_group_communicators |
| std::unique_ptr< El::Grid > | m_grid |
| int | m_num_trainers |
| int | m_procs_per_trainer |
| int | m_trainer_rank |
| int | m_rank_in_trainer |
| int | m_procs_per_node |
| int | m_rank_in_node |
| std::vector< int > | m_world_ranks_on_node |
| int | m_threads_per_proc |
| GridType | m_grid_type = GridType::NO_GRID |
| bool | m_create_two_models = false |
| bool | m_subgrid_async_progress = false |
| std::unique_ptr< El::Grid > | m_secondary_grid |
| std::unique_ptr< El::Grid > | m_subset_grid |
| std::vector< int > | m_primary_grid_ranks |
| std::vector< int > | m_secondary_grid_ranks |
| size_t | m_num_trainer_barriers |
| size_t | m_num_intertrainer_barriers |
| size_t | m_num_global_barriers |
| size_t | m_bytes_sent |
| size_t | m_bytes_received |
Manage communication. This supports separate trainers, each of which are split over potentially several processes. Every trainer is split over the same number of processes. The corresponding processes between trainers are on the "inter-trainer communicator". You can also do point-to-point or broadcast communication to arbitrary sets of processes.
| lbann::lbann_comm::lbann_comm | ( | int | procs_per_trainer = 0, |
| El::mpi::Comm | world = El::mpi::COMM_WORLD.GetMPIComm() |
||
| ) |
Init communicators for trainers each with procs_per_trainer processes, defaulting to every process in one trainer.
|
delete |
Don't allow copying; it doesn't make sense for the communicator.
| lbann::lbann_comm::~lbann_comm | ( | ) |
| void lbann::lbann_comm::all_gather | ( | const T * | src, |
| int | src_count, | ||
| T * | rcv, | ||
| int | rcv_count, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Allgather over an arbitrary communicator
Definition at line 176 of file comm_impl.hpp.
| void lbann::lbann_comm::all_gather | ( | const T * | src, |
| int | src_count, | ||
| T * | rcv, | ||
| int | rcv_count, | ||
| const El::mpi::Comm & | c, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
Definition at line 190 of file comm_impl.hpp.
| void lbann::lbann_comm::all_gather | ( | std::vector< T > const & | src, |
| std::vector< T > & | rcs, | ||
| std::vector< int > const & | rcv_counts, | ||
| std::vector< int > const & | rcv_disp, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Allgatherv over an arbitrary communicator; all vectors must be correctly sized prior to entry.
Definition at line 205 of file comm_impl.hpp.
| void lbann::lbann_comm::all_gather | ( | T const & | src, |
| std::vector< T > & | data, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Allgather for a single element over an arbitrary communicator; std::vector<T> &data must be correctly sized prior to entry.
Definition at line 243 of file comm_impl.hpp.
| T lbann::lbann_comm::allreduce | ( | T | snd, |
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Scalar allreduce.
Definition at line 643 of file comm_impl.hpp.
| void lbann::lbann_comm::allreduce | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Scalar-array allreduce.
Definition at line 658 of file comm_impl.hpp.
| void lbann::lbann_comm::allreduce | ( | T * | data, |
| int | count, | ||
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
In-place scalar-array allreduce.
Definition at line 687 of file comm_impl.hpp.
| void lbann::lbann_comm::allreduce | ( | El::AbstractMatrix< TensorDataType > & | m, |
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Matrix allreduce.
| void lbann::lbann_comm::allreduce | ( | El::AbstractDistMatrix< TensorDataType > & | m, |
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Matrix allreduce.
|
inlinenoexcept |
|
inlinenoexcept |
| void lbann::lbann_comm::barrier | ( | const El::mpi::Comm & | c | ) | const |
Barrier on an arbitrary communicator.
| void lbann::lbann_comm::broadcast | ( | int | root, |
| T & | val, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Broadcast a scalar value over an arbitrary communicator.
Definition at line 1021 of file comm_impl.hpp.
| void lbann::lbann_comm::broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Broadcast a buffer over an arbitrary communicator assuming that the buffer space is already allocated.
Definition at line 60 of file comm_impl.hpp.
| void lbann::lbann_comm::broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count, | ||
| const El::mpi::Comm & | c, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::broadcast | ( | const int | root, |
| std::vector< T > & | data, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Broadcast vector<> over an arbitrary communicator; vector<> for non-root processes will be resized as needed.
Definition at line 140 of file comm_impl.hpp.
| void lbann::lbann_comm::broadcast_custom | ( | int | root, |
| T & | val, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
| void lbann::lbann_comm::broadcast_native | ( | int | root, |
| T & | val, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Definition at line 1037 of file comm_impl.hpp.
|
inlineprivatenoexcept |
|
inline |
| void lbann::lbann_comm::gather | ( | T | snd, |
| int | root, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar gather (for non-root processes).
Definition at line 359 of file comm_impl.hpp.
| void lbann::lbann_comm::gather | ( | T | snd, |
| T * | rcv, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar gather (for root processes).
Definition at line 374 of file comm_impl.hpp.
| void lbann::lbann_comm::gather | ( | T | snd, |
| std::vector< T > & | rcv, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar gather (for root processes).
Definition at line 383 of file comm_impl.hpp.
| void lbann::lbann_comm::gather | ( | T const * | snd, |
| int | count, | ||
| int | root, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar-array gather (for non-root processes).
Definition at line 391 of file comm_impl.hpp.
| void lbann::lbann_comm::gather | ( | T const * | snd, |
| int | count, | ||
| int | root, | ||
| const El::mpi::Comm & | c, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
Definition at line 399 of file comm_impl.hpp.
| void lbann::lbann_comm::gather | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar-array gather (for root processes).
Definition at line 410 of file comm_impl.hpp.
| void lbann::lbann_comm::gather | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| const El::mpi::Comm & | c, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
Definition at line 418 of file comm_impl.hpp.
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
| int lbann::lbann_comm::get_count | ( | int | trainer, |
| int | rank | ||
| ) | const |
Determine the size (count) of an incoming message.
Definition at line 1005 of file comm_impl.hpp.
| int lbann::lbann_comm::get_count | ( | int | trainer | ) | const |
Definition at line 1015 of file comm_impl.hpp.
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
Developer's note: to get the raw MPI communicator, which may be needed when working with external libraries, by example: comm->get_intertrainer_comm().GetMPIComm()Return the intertrainer communicator.
Definition at line 883 of file comm.hpp.
|
inlinenoexcept |
|
inlinenoexcept |
|
inline |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
| const El::mpi::Comm& lbann::lbann_comm::get_packed_group_comm | ( | int | num_per_group | ) | const |
Return a communicator containing num_per_group processors.
This will attempt to pack processes so that the processes in each group are physically close together on the system.
num_per_group must evenly divide the number of processors in the world.
|
inline |
|
inline |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inlinenoexcept |
|
inline |
|
inline |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
|
inlinenoexcept |
| void lbann::lbann_comm::global_barrier | ( | ) | const |
Barrier among all processes.
| T lbann::lbann_comm::intertrainer_allreduce | ( | T | snd, |
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Inter-trainer all-reduce.
Definition at line 622 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_barrier | ( | ) | const |
Barrier among the inter-trainer processes.
| void lbann::lbann_comm::intertrainer_broadcast | ( | int | root, |
| T & | val | ||
| ) | const |
Inter-trainer broadcast of a scalar.
Definition at line 42 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count | ||
| ) | const |
Inter-trainer broadcast of a buffer.
Definition at line 87 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::intertrainer_broadcast | ( | int | root, |
| std::vector< T > & | data | ||
| ) | const |
Broadcast vector<> across trainers.
Broadcast vector<> within trainer; vector<> for non-root processes will be resized as needed.
Definition at line 162 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_broadcast_matrix | ( | AbsMat & | mat, |
| int | root | ||
| ) | const |
Broadcast mat over the inter-trainer communicator starting from root.
| void lbann::lbann_comm::intertrainer_broadcast_matrix | ( | AbsDistMat & | mat, |
| int | root | ||
| ) | const |
| void lbann::lbann_comm::intertrainer_gather | ( | T | snd, |
| int | root | ||
| ) | const |
Inter-trainer gather (for non-root processes).
Definition at line 331 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_gather | ( | T | snd, |
| std::vector< T > & | rcv | ||
| ) | const |
Inter-trainer gather (for root processes).
Definition at line 337 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_gather | ( | T const * | snd, |
| int | count, | ||
| int | root | ||
| ) | const |
Inter-trainer scalar-array gather (for non-root processes).
Definition at line 343 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_gather | ( | T const * | snd, |
| int | count, | ||
| T * | rcv | ||
| ) | const |
Inter-trainer scalar-array gather (for root processes).
Definition at line 351 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_reduce | ( | T | snd, |
| int | root, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Inter-trainer reduce (for non-root processes).
Definition at line 456 of file comm_impl.hpp.
| T lbann::lbann_comm::intertrainer_reduce | ( | T | snd, |
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Inter-trainer reduce (for root processes).
Definition at line 464 of file comm_impl.hpp.
| void lbann::lbann_comm::intertrainer_sum_matrix | ( | AbsMat & | mat | ) | const |
Perform a sum reduction of mat over the inter-trainer communicator.
| void lbann::lbann_comm::intertrainer_sum_matrix | ( | AbsDistMat & | mat | ) | const |
|
inline |
|
inlinestaticnoexcept |
|
inlinestaticnoexcept |
|
inline |
| void lbann::lbann_comm::lbann_comm_abort | ( | std::string | msg | ) | const |
throws an lbann_exception
|
inlinenoexcept |
|
inlinenoexcept |
| void lbann::lbann_comm::nb_allreduce | ( | El::AbstractMatrix< TensorDataType > & | m, |
| const El::mpi::Comm & | c, | ||
| Al::request & | req, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Non-blocking matrix allreduce. If LBANN has not been built with Aluminum, then this calls a blocking matrix allreduce.
| void lbann::lbann_comm::nb_allreduce | ( | El::AbstractDistMatrix< TensorDataType > & | m, |
| const El::mpi::Comm & | c, | ||
| Al::request & | req, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Non-blocking matrix allreduce. If LBANN has not been built with Aluminum, then this calls a blocking matrix allreduce.
| void lbann::lbann_comm::nb_allreduce | ( | T * | data, |
| int | count, | ||
| const El::mpi::Comm & | c, | ||
| Al::request & | req, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Non-blocking in-place scalar-array allreduce. If LBANN has not been built with Aluminum, then this calls a blocking allreduce. This currently only supports host pointers (i.e. the MPI backend).
Definition at line 718 of file comm_impl.hpp.
| void lbann::lbann_comm::nb_recv | ( | T * | data, |
| int | count, | ||
| int | trainer, | ||
| int | rank, | ||
| El::mpi::Request< T > & | req | ||
| ) | const |
Corresponding non-blocking receives.
Definition at line 879 of file comm_impl.hpp.
| void lbann::lbann_comm::nb_recv | ( | T * | data, |
| int | count, | ||
| int | trainer, | ||
| El::mpi::Request< T > & | req | ||
| ) | const |
| void lbann::lbann_comm::nb_recv | ( | AbsMat & | mat, |
| int | trainer, | ||
| int | rank, | ||
| El::mpi::Request< DataType > & | req | ||
| ) | const |
| void lbann::lbann_comm::nb_recv | ( | DistMat & | mat, |
| int | trainer, | ||
| int | rank, | ||
| El::mpi::Request< DataType > & | req | ||
| ) | const |
|
inline |
|
inline |
| void lbann::lbann_comm::nb_recv | ( | T * | data, |
| int | count, | ||
| El::mpi::Request< T > & | req | ||
| ) | const |
| void lbann::lbann_comm::nb_recv | ( | AbsMat & | mat, |
| El::mpi::Request< DataType > & | req | ||
| ) | const |
| void lbann::lbann_comm::nb_recv | ( | DistMat & | mat, |
| El::mpi::Request< DataType > & | req | ||
| ) | const |
| void lbann::lbann_comm::nb_send | ( | const T * | data, |
| int | count, | ||
| int | trainer, | ||
| int | rank, | ||
| El::mpi::Request< T > & | req | ||
| ) | const |
Corresponding non-blocking sends.
Definition at line 793 of file comm_impl.hpp.
| void lbann::lbann_comm::nb_send | ( | const T * | data, |
| int | count, | ||
| int | trainer, | ||
| El::mpi::Request< T > & | req | ||
| ) | const |
| void lbann::lbann_comm::nb_send | ( | const AbsMat & | mat, |
| int | trainer, | ||
| int | rank, | ||
| El::mpi::Request< DataType > & | req | ||
| ) | const |
| void lbann::lbann_comm::nb_send | ( | const DistMat & | mat, |
| int | trainer, | ||
| int | rank, | ||
| El::mpi::Request< DataType > & | req | ||
| ) | const |
|
inline |
|
inline |
| void lbann::lbann_comm::nb_tagged_recv | ( | T * | data, |
| int | count, | ||
| int | rank, | ||
| int | tag, | ||
| El::mpi::Request< T > & | req, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Definition at line 893 of file comm_impl.hpp.
| void lbann::lbann_comm::nb_tagged_send | ( | const T * | data, |
| int | count, | ||
| int | rank, | ||
| int | tag, | ||
| El::mpi::Request< T > & | req, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Definition at line 807 of file comm_impl.hpp.
|
delete |
Don't allow assignment; it doesn't make sense for the communicator.
| void lbann::lbann_comm::recv | ( | T * | data, |
| int | count, | ||
| int | trainer, | ||
| int | rank | ||
| ) | const |
Corresponding receive to send.
Definition at line 828 of file comm_impl.hpp.
| void lbann::lbann_comm::recv | ( | T * | data, |
| int | count, | ||
| int | trainer | ||
| ) | const |
| void lbann::lbann_comm::recv | ( | T * | data, |
| int | count | ||
| ) | const |
| void lbann::lbann_comm::recv | ( | T * | data, |
| int | count, | ||
| int | trainer, | ||
| int | rank, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::recv | ( | T * | data, |
| int | count, | ||
| int | trainer, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::recv | ( | AbsMat & | mat, |
| int | trainer, | ||
| int | rank | ||
| ) | const |
| void lbann::lbann_comm::recv | ( | DistMat & | mat, |
| int | trainer, | ||
| int | rank | ||
| ) | const |
|
inline |
|
inline |
| void lbann::lbann_comm::recv | ( | T * | data, |
| int | count, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
As above, but receive from anyone.
Definition at line 869 of file comm_impl.hpp.
| void lbann::lbann_comm::recv | ( | AbsMat & | mat | ) | const |
| void lbann::lbann_comm::recv | ( | DistMat & | mat | ) | const |
| void lbann::lbann_comm::reduce | ( | T | snd, |
| int | root, | ||
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Scalar reduce (for non-root processes).
Definition at line 502 of file comm_impl.hpp.
| T lbann::lbann_comm::reduce | ( | T | snd, |
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Scalar reduce (for root processes).
Definition at line 518 of file comm_impl.hpp.
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| int | root, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar-array reduce (for non-root processes).
Definition at line 539 of file comm_impl.hpp.
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| int | root, | ||
| const El::mpi::Comm & | c, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| int | root, | ||
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op | ||
| ) | const |
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| int | root, | ||
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
Definition at line 566 of file comm_impl.hpp.
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| const El::mpi::Comm & | c, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
Scalar-array reduce (for root processes).
Definition at line 578 of file comm_impl.hpp.
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op | ||
| ) | const |
| void lbann::lbann_comm::reduce | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| const El::mpi::Comm & | c, | ||
| El::mpi::Op | op, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
Definition at line 605 of file comm_impl.hpp.
|
inlinenoexcept |
|
noexcept |
Reset the number of threads per process to the default.
| size_t lbann::lbann_comm::resize | ( | const int | root, |
| std::vector< T > & | data, | ||
| const El::mpi::Comm & | c | ||
| ) | const |
Resize vector<> over an arbitrary communicator to match the one on root.
Definition at line 123 of file comm_impl.hpp.
| T lbann::lbann_comm::scatter | ( | int | root, |
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar scatter (for non-root processes).
Definition at line 431 of file comm_impl.hpp.
| T lbann::lbann_comm::scatter | ( | T const * | snd, |
| const El::mpi::Comm & | c | ||
| ) | const |
Scalar scatter (for root processes).
Definition at line 446 of file comm_impl.hpp.
| void lbann::lbann_comm::send | ( | const T * | data, |
| int | count, | ||
| int | trainer, | ||
| int | rank | ||
| ) | const |
Send a buffer to rank in trainer.
Definition at line 761 of file comm_impl.hpp.
| void lbann::lbann_comm::send | ( | const T * | data, |
| int | count, | ||
| int | trainer, | ||
| int | rank, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::send | ( | const T * | data, |
| int | count, | ||
| int | trainer, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::send | ( | const AbsMat & | mat, |
| int | trainer, | ||
| int | rank | ||
| ) | const |
| void lbann::lbann_comm::send | ( | const DistMat & | mat, |
| int | trainer, | ||
| int | rank | ||
| ) | const |
|
inline |
|
inline |
| void lbann::lbann_comm::sendrecv | ( | const T * | snd, |
| int | send_count, | ||
| int | send_trainer, | ||
| int | send_rank, | ||
| T * | rcv, | ||
| int | recv_count, | ||
| int | recv_trainer, | ||
| int | recv_rank | ||
| ) | const |
Send/recv to/from ranks.
Definition at line 923 of file comm_impl.hpp.
| void lbann::lbann_comm::sendrecv | ( | const T * | snd, |
| int | send_count, | ||
| int | send_trainer, | ||
| T * | rcv, | ||
| int | recv_count, | ||
| int | recv_trainer | ||
| ) | const |
| void lbann::lbann_comm::sendrecv | ( | const T * | snd, |
| int | send_count, | ||
| int | send_trainer, | ||
| int | send_rank, | ||
| T * | rcv, | ||
| int | recv_count, | ||
| int | recv_trainer, | ||
| int | recv_rank, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::sendrecv | ( | const T * | snd, |
| int | send_count, | ||
| int | send_trainer, | ||
| T * | rcv, | ||
| int | recv_count, | ||
| int | recv_trainer, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
|
private |
Setup communicator for processes in the same compute node.
|
private |
Initialize the default number of threads per process. This is the number of OpenMP threads to use for parallel regions, provided omp_set_num_threads has not been called or the num_threads directive has not been provided. If the environment variable OMP_NUM_THREADS is defined, it's value is used for the default. Otherwise, then the default is the number of hardware cores per node divided by the number of processes per node.
| void lbann::lbann_comm::split_trainer_grid | ( | int | num_process_primary_grid = 0, |
| bool | create_two_models = false, |
||
| bool | enable_async_comm = false, |
||
| bool | enable_topo_aware = false |
||
| ) |
Split the commicator for the given trainer into primary and secondary
| num_process_primar_grid | Absolute number of MPI ranks assigned to the primary grid |
| create_two_models | Create a secondary copy of the model on the secondary grid to perform redundant computation and minimize communication. |
| enable_async_comm | Use non-blocking sends and receivces |
| enable_topo_aware | Assign primary and secondary grid resources so that they are interleaved and thus should be allocated to the same compute node assuming that there are always an even number of accelerators per node. |
| void lbann::lbann_comm::split_trainers | ( | int | procs_per_trainer = -1, |
| int | trainer_grid_height = -1 |
||
| ) |
Construct communicators for trainers.
Invalidates any existing trainer communicators.
| procs_per_trainer | Number of MPI ranks in a trainer. Default is size of world communicator. |
| trainer_grid_height | Height of 2D process grid for each trainer. Must divide procs_per_trainer. Default grid is approximately square. |
| bool lbann::lbann_comm::test | ( | Al::request & | req | ) | const |
Test whether a non-blocking request has completed; true if it has.
| void lbann::lbann_comm::trainer_all_gather | ( | std::vector< T > const & | src, |
| std::vector< T > & | rcs, | ||
| std::vector< int > const & | rcv_counts, | ||
| std::vector< int > const & | rcv_disp | ||
| ) | const |
Allgatherv over a trainer communicator; all vectors must be correctly sized prior to entry.
Definition at line 231 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_all_gather | ( | T const & | src, |
| std::vector< T > & | data | ||
| ) | const |
Allgather for a single element over the trainer communicator; std::vector<T> &data must be correctly sized prior to entry.
Definition at line 268 of file comm_impl.hpp.
| T lbann::lbann_comm::trainer_allreduce | ( | T | snd, |
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Within-trainer all-reduce.
Definition at line 628 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_allreduce | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Scalar array within-trainer all-reduce.
Definition at line 634 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_barrier | ( | ) | const |
Barrier among processes in this trainer.
| void lbann::lbann_comm::trainer_broadcast | ( | int | root, |
| T & | val | ||
| ) | const |
Within-trainer broadcast of a scalar.
Definition at line 48 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count | ||
| ) | const |
Within-trainer broadcast of a buffer.
Definition at line 103 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::trainer_broadcast | ( | int | root, |
| std::vector< T > & | data | ||
| ) | const |
Broadcast vector<> within trainer.
Definition at line 169 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_gather | ( | T | snd, |
| int | root | ||
| ) | const |
Within-trainer scalar gather (for non-root processes).
Definition at line 275 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_gather | ( | T | snd, |
| T * | rcv | ||
| ) | const |
Within-trainer scalar gather (for root processes).
Definition at line 281 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_gather | ( | T const * | snd, |
| int | count, | ||
| int | root | ||
| ) | const |
Within-trainer scalar-array gather (for non-root processes).
Definition at line 287 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_gather | ( | T const * | snd, |
| int | count, | ||
| T * | rcv | ||
| ) | const |
Within-trainer scalar-array gather (for root processes).
Definition at line 295 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_gatherv | ( | T const * | snd, |
| int | count, | ||
| int | root | ||
| ) | const |
Within-trainer variable-length-array gather (for non-root processes).
Definition at line 303 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_gatherv | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| int const * | rcv_counts, | ||
| int const * | rcv_displacements | ||
| ) | const |
| void lbann::lbann_comm::trainer_reduce | ( | T | snd, |
| int | root, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Within-trainer reduce (for non-root processes).
Definition at line 470 of file comm_impl.hpp.
| T lbann::lbann_comm::trainer_reduce | ( | T | snd, |
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Within-trainer reduce (for root processes).
Definition at line 478 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_reduce | ( | T const * | snd, |
| int | count, | ||
| int | root, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Within-trainer scalar array reduce (for non-root processes).
Definition at line 484 of file comm_impl.hpp.
| void lbann::lbann_comm::trainer_reduce | ( | T const * | snd, |
| int | count, | ||
| T * | rcv, | ||
| El::mpi::Op | op = El::mpi::SUM |
||
| ) | const |
Within-trainer scalar array reduce (for root processes).
Definition at line 493 of file comm_impl.hpp.
| void lbann::lbann_comm::wait | ( | El::mpi::Request< T > & | req | ) | const |
Wait for a non-blocking request to complete.
Definition at line 754 of file comm_impl.hpp.
| void lbann::lbann_comm::wait | ( | Al::request & | req | ) | const |
Wait for a non-blocking request to complete.
| void lbann::lbann_comm::wait_all | ( | std::vector< El::mpi::Request< T >> & | req | ) | const |
Wait for a all non-blocking requests to complete.
Definition at line 747 of file comm_impl.hpp.
| void lbann::lbann_comm::world_all_gather | ( | T const & | src, |
| std::vector< T > & | data | ||
| ) | const |
Allgather for a single element over the world communicator; std::vector<T> &data must be correctly sized prior to entry.
Definition at line 259 of file comm_impl.hpp.
| void lbann::lbann_comm::world_broadcast | ( | int | root, |
| T & | val | ||
| ) | const |
World broadcast of a scalar.
Definition at line 36 of file comm_impl.hpp.
| void lbann::lbann_comm::world_broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count | ||
| ) | const |
World broadcast of a buffer.
Definition at line 70 of file comm_impl.hpp.
| void lbann::lbann_comm::world_broadcast | ( | const int | root, |
| T * | data, | ||
| const int | count, | ||
| El::SyncInfo< D > const & | syncInfo | ||
| ) | const |
| void lbann::lbann_comm::world_broadcast | ( | int | root, |
| std::vector< T > & | data | ||
| ) | const |
Broadcast vector<> to world.
Definition at line 152 of file comm_impl.hpp.
|
private |
|
private |
|
private |
|
private |
|
mutableprivate |
|
private |
|
private |
|
mutableprivate |
|
mutableprivate |
|
mutableprivate |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |
|
private |