LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
lbann::lbann_comm Class Reference

#include <comm.hpp>

Public Member Functions

 lbann_comm (int procs_per_trainer=0, El::mpi::Comm world=El::mpi::COMM_WORLD.GetMPIComm())
 
 lbann_comm (const lbann_comm &)=delete
 
lbann_commoperator= (const lbann_comm &)=delete
 
 ~lbann_comm ()
 
void split_trainers (int procs_per_trainer=-1, int trainer_grid_height=-1)
 Construct communicators for trainers. More...
 
void split_trainer_grid (int num_process_primary_grid=0, bool create_two_models=false, bool enable_async_comm=false, bool enable_topo_aware=false)
 
GridType get_grid_type () const noexcept
 
int get_trainer_rank () const noexcept
 
int get_rank_in_trainer () const noexcept
 
int get_rank_in_world () const
 
int get_world_rank (int trainer, int rank) const noexcept
 
int map_world_rank_to_trainer_rank (int world_rank) const noexcept
 
int map_world_rank_to_rank_in_trainer (int world_rank) const noexcept
 
int get_trainer_master () const noexcept
 
int get_intertrainer_master () const noexcept
 
int get_world_master () const noexcept
 
bool am_trainer_master () const noexcept
 
bool am_world_master () const noexcept
 
El::Grid & get_trainer_grid ()
 
const El::Grid & get_trainer_grid () const
 
El::Grid & get_secondary_grid ()
 
const El::Grid & get_secondary_grid () const
 
El::Grid & get_subset_grid ()
 
const El::Grid & get_subset_grid () const
 
int get_num_trainers () const noexcept
 
int get_procs_per_trainer () const noexcept
 
int get_procs_per_node () const noexcept
 
int get_procs_in_world () const
 
int get_rank_in_node () const noexcept
 
bool is_world_rank_on_node (int rank) const
 
int get_default_threads_per_proc () const noexcept
 
void reset_threads () const noexcept
 
void intertrainer_sum_matrix (AbsMat &mat) const
 
void intertrainer_sum_matrix (AbsDistMat &mat) const
 
void intertrainer_broadcast_matrix (AbsMat &mat, int root) const
 
void intertrainer_broadcast_matrix (AbsDistMat &mat, int root) const
 
template<typename T , bool S = is_instantiated_El_mpi_type<T>::value>
void broadcast (int root, T &val, const El::mpi::Comm &c) const
 Broadcast a scalar value over an arbitrary communicator. More...
 
template<typename T >
void broadcast_custom (int root, T &val, const El::mpi::Comm &c) const
 
template<typename T >
void broadcast_native (int root, T &val, const El::mpi::Comm &c) const
 
template<typename T >
void world_broadcast (int root, T &val) const
 World broadcast of a scalar. More...
 
template<typename T >
void intertrainer_broadcast (int root, T &val) const
 Inter-trainer broadcast of a scalar. More...
 
template<typename T >
void trainer_broadcast (int root, T &val) const
 Within-trainer broadcast of a scalar. More...
 
template<typename T >
void broadcast (const int root, T *data, const int count, const El::mpi::Comm &c) const
 
template<typename T , El::Device D, bool S = is_instantiated_El_mpi_type<T>::value>
void broadcast (const int root, T *data, const int count, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
void world_broadcast (const int root, T *data, const int count) const
 World broadcast of a buffer. More...
 
template<typename T , El::Device D>
void world_broadcast (const int root, T *data, const int count, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
void intertrainer_broadcast (const int root, T *data, const int count) const
 Inter-trainer broadcast of a buffer. More...
 
template<typename T , El::Device D>
void intertrainer_broadcast (const int root, T *data, const int count, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
void trainer_broadcast (const int root, T *data, const int count) const
 Within-trainer broadcast of a buffer. More...
 
template<typename T , El::Device D>
void trainer_broadcast (const int root, T *data, const int count, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
size_t resize (const int root, std::vector< T > &data, const El::mpi::Comm &c) const
 
template<typename T >
void broadcast (const int root, std::vector< T > &data, const El::mpi::Comm &c) const
 
template<typename T >
void world_broadcast (int root, std::vector< T > &data) const
 Broadcast vector<> to world. More...
 
template<typename T >
void intertrainer_broadcast (int root, std::vector< T > &data) const
 Broadcast vector<> across trainers. More...
 
template<typename T >
void trainer_broadcast (int root, std::vector< T > &data) const
 Broadcast vector<> within trainer. More...
 
template<typename T >
void all_gather (const T *src, int src_count, T *rcv, int rcv_count, const El::mpi::Comm &c) const
 
template<typename T , El::Device D>
void all_gather (const T *src, int src_count, T *rcv, int rcv_count, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
void all_gather (std::vector< T > const &src, std::vector< T > &rcs, std::vector< int > const &rcv_counts, std::vector< int > const &rcv_disp, const El::mpi::Comm &c) const
 
template<typename T >
void trainer_all_gather (std::vector< T > const &src, std::vector< T > &rcs, std::vector< int > const &rcv_counts, std::vector< int > const &rcv_disp) const
 
template<typename T >
void all_gather (T const &src, std::vector< T > &data, const El::mpi::Comm &c) const
 
template<typename T >
void world_all_gather (T const &src, std::vector< T > &data) const
 
template<typename T >
void trainer_all_gather (T const &src, std::vector< T > &data) const
 
template<typename T >
void trainer_gather (T snd, int root) const
 
template<typename T >
void trainer_gather (T snd, T *rcv) const
 
template<typename T >
void trainer_gather (T const *snd, int count, int root) const
 
template<typename T >
void trainer_gather (T const *snd, int count, T *rcv) const
 
template<typename T >
void trainer_gatherv (T const *snd, int count, int root) const
 
template<typename T >
void trainer_gatherv (T const *snd, int count, T *rcv, int const *rcv_counts, int const *rcv_displacements) const
 
template<typename T >
void intertrainer_gather (T snd, int root) const
 
template<typename T >
void intertrainer_gather (T snd, std::vector< T > &rcv) const
 
template<typename T >
void intertrainer_gather (T const *snd, int count, int root) const
 
template<typename T >
void intertrainer_gather (T const *snd, int count, T *rcv) const
 
template<typename T >
void gather (T snd, int root, const El::mpi::Comm &c) const
 
template<typename T >
void gather (T snd, T *rcv, const El::mpi::Comm &c) const
 
template<typename T >
void gather (T snd, std::vector< T > &rcv, const El::mpi::Comm &c) const
 
template<typename T >
void gather (T const *snd, int count, int root, const El::mpi::Comm &c) const
 
template<typename T , El::Device D>
void gather (T const *snd, int count, int root, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
void gather (T const *snd, int count, T *rcv, const El::mpi::Comm &c) const
 
template<typename T , El::Device D>
void gather (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
scatter (int root, const El::mpi::Comm &c) const
 
template<typename T >
scatter (T const *snd, const El::mpi::Comm &c) const
 
template<typename T >
void intertrainer_reduce (T snd, int root, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
intertrainer_reduce (T snd, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void trainer_reduce (T snd, int root, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
trainer_reduce (T snd, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void trainer_reduce (T const *snd, int count, int root, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void trainer_reduce (T const *snd, int count, T *rcv, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void reduce (T snd, int root, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
reduce (T snd, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void reduce (T const *snd, int count, int root, const El::mpi::Comm &c) const
 
template<typename T , El::Device D>
void reduce (T const *snd, int count, int root, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
void reduce (T const *snd, int count, int root, const El::mpi::Comm &c, El::mpi::Op op) const
 
template<typename T , El::Device D>
void reduce (T const *snd, int count, int root, const El::mpi::Comm &c, El::mpi::Op op, El::SyncInfo< D > const &syncInfo) const
 
template<typename T , El::Device D>
void reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
void reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c) const
 
template<typename T >
void reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::mpi::Op op) const
 
template<typename T , El::Device D>
void reduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::mpi::Op op, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
intertrainer_allreduce (T snd, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
trainer_allreduce (T snd, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void trainer_allreduce (T const *snd, int count, T *rcv, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
allreduce (T snd, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void allreduce (T const *snd, int count, T *rcv, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void allreduce (T *data, int count, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const
 
template<typename TensorDataType >
void allreduce (El::AbstractMatrix< TensorDataType > &m, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const
 
template<typename TensorDataType >
void allreduce (El::AbstractDistMatrix< TensorDataType > &m, const El::mpi::Comm &c, El::mpi::Op op=El::mpi::SUM) const
 
template<typename TensorDataType >
void nb_allreduce (El::AbstractMatrix< TensorDataType > &m, const El::mpi::Comm &c, Al::request &req, El::mpi::Op op=El::mpi::SUM) const
 
template<typename TensorDataType >
void nb_allreduce (El::AbstractDistMatrix< TensorDataType > &m, const El::mpi::Comm &c, Al::request &req, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void nb_allreduce (T *data, int count, const El::mpi::Comm &c, Al::request &req, El::mpi::Op op=El::mpi::SUM) const
 
template<typename T >
void wait_all (std::vector< El::mpi::Request< T >> &req) const
 
template<typename T >
void wait (El::mpi::Request< T > &req) const
 
void wait (Al::request &req) const
 
bool test (Al::request &req) const
 
void intertrainer_barrier () const
 
void trainer_barrier () const
 
void global_barrier () const
 
void barrier (const El::mpi::Comm &c) const
 
template<typename T >
void send (const T *data, int count, int trainer, int rank) const
 
template<typename T , El::Device D>
void send (const T *data, int count, int trainer, int rank, El::SyncInfo< D > const &syncInfo) const
 
template<typename T , El::Device D>
void send (const T *data, int count, int trainer, El::SyncInfo< D > const &syncInfo) const
 
void send (const AbsMat &mat, int trainer, int rank) const
 
void send (const DistMat &mat, int trainer, int rank) const
 
void send (const AbsMat &mat, int trainer) const
 
void send (const DistMat &mat, int trainer) const
 
template<typename T >
void nb_send (const T *data, int count, int trainer, int rank, El::mpi::Request< T > &req) const
 
template<typename T >
void nb_tagged_send (const T *data, int count, int rank, int tag, El::mpi::Request< T > &req, const El::mpi::Comm &c) const
 
template<typename T >
void nb_send (const T *data, int count, int trainer, El::mpi::Request< T > &req) const
 
void nb_send (const AbsMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const
 
void nb_send (const DistMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const
 
void nb_send (const AbsMat &mat, int trainer, El::mpi::Request< DataType > &req) const
 
void nb_send (const DistMat &mat, int trainer, El::mpi::Request< DataType > &req) const
 
template<typename T >
void recv (T *data, int count, int trainer, int rank) const
 
template<typename T >
void recv (T *data, int count, int trainer) const
 
template<typename T >
void recv (T *data, int count) const
 
template<typename T , El::Device D>
void recv (T *data, int count, int trainer, int rank, El::SyncInfo< D > const &syncInfo) const
 
template<typename T , El::Device D>
void recv (T *data, int count, int trainer, El::SyncInfo< D > const &syncInfo) const
 
void recv (AbsMat &mat, int trainer, int rank) const
 
void recv (DistMat &mat, int trainer, int rank) const
 
void recv (AbsMat &mat, int trainer) const
 
void recv (DistMat &mat, int trainer) const
 
template<typename T , El::Device D>
void recv (T *data, int count, El::SyncInfo< D > const &syncInfo) const
 
void recv (AbsMat &mat) const
 
void recv (DistMat &mat) const
 
template<typename T >
void nb_recv (T *data, int count, int trainer, int rank, El::mpi::Request< T > &req) const
 
template<typename T >
void nb_tagged_recv (T *data, int count, int rank, int tag, El::mpi::Request< T > &req, const El::mpi::Comm &c) const
 
template<typename T >
void nb_recv (T *data, int count, int trainer, El::mpi::Request< T > &req) const
 
void nb_recv (AbsMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const
 
void nb_recv (DistMat &mat, int trainer, int rank, El::mpi::Request< DataType > &req) const
 
void nb_recv (AbsMat &mat, int trainer, El::mpi::Request< DataType > &req) const
 
void nb_recv (DistMat &mat, int trainer, El::mpi::Request< DataType > &req) const
 
template<typename T >
void nb_recv (T *data, int count, El::mpi::Request< T > &req) const
 
void nb_recv (AbsMat &mat, El::mpi::Request< DataType > &req) const
 
void nb_recv (DistMat &mat, El::mpi::Request< DataType > &req) const
 
template<typename T , El::Device D>
void sendrecv (const T *snd, int send_count, int send_trainer, int send_rank, T *rcv, int recv_count, int recv_trainer, int recv_rank) const
 
template<typename T , El::Device D>
void sendrecv (const T *snd, int send_count, int send_trainer, T *rcv, int recv_count, int recv_trainer) const
 
template<typename T , El::Device D>
void sendrecv (const T *snd, int send_count, int send_trainer, int send_rank, T *rcv, int recv_count, int recv_trainer, int recv_rank, El::SyncInfo< D > const &syncInfo) const
 
template<typename T , El::Device D>
void sendrecv (const T *snd, int send_count, int send_trainer, T *rcv, int recv_count, int recv_trainer, El::SyncInfo< D > const &syncInfo) const
 
template<typename T >
int get_count (int trainer, int rank) const
 
template<typename T >
int get_count (int trainer) const
 
size_t get_num_trainer_barriers () const noexcept
 
size_t get_num_intertrainer_barriers () const noexcept
 
size_t get_num_global_barriers () const noexcept
 
size_t get_bytes_sent () const noexcept
 
size_t get_bytes_received () const noexcept
 
void reset_stats_counters () noexcept
 
const El::mpi::Comm & get_intertrainer_comm () const noexcept
 
const El::mpi::Comm & get_trainer_comm () const noexcept
 
const El::mpi::Comm & get_combined_grid_comm () const noexcept
 
const El::mpi::Comm & get_world_comm () const noexcept
 
const El::mpi::Comm & get_node_comm () const noexcept
 
const El::mpi::Comm & get_KFAC_comm () const noexcept
 
std::vector< int > get_primary_grid_ranks ()
 
std::vector< int > get_secondary_grid_ranks ()
 
bool get_KFAC_subgrid_create_two_models ()
 
bool enable_subgrid_async_communication ()
 
const El::mpi::Comm & get_packed_group_comm (int num_per_group) const
 
bool is_rank_node_local (int rank, const El::mpi::Comm &comm) const
 
void lbann_comm_abort (std::string msg) const
 

Static Public Member Functions

static bool is_sendable (const AbsMat &mat) noexcept
 
static bool is_sendable (const AbsDistMat &dist_mat) noexcept
 

Private Member Functions

void setup_node_comm ()
 
void setup_threads ()
 
void count_bytes_broadcast (const size_t bytes, const int rank, const int root) const noexcept
 

Private Attributes

const El::mpi::Comm m_world_comm
 
El::mpi::Comm m_trainer_comm
 
El::mpi::Comm m_intertrainer_comm
 
El::mpi::Comm m_node_comm
 
El::mpi::Comm m_primary_grid_comm
 
El::mpi::Comm m_secondary_grid_comm
 
El::mpi::Comm m_combined_grid_comm
 
std::unordered_map< int, El::mpi::Comm > m_group_communicators
 
std::unique_ptr< El::Grid > m_grid
 
int m_num_trainers
 
int m_procs_per_trainer
 
int m_trainer_rank
 
int m_rank_in_trainer
 
int m_procs_per_node
 
int m_rank_in_node
 
std::vector< int > m_world_ranks_on_node
 
int m_threads_per_proc
 
GridType m_grid_type = GridType::NO_GRID
 
bool m_create_two_models = false
 
bool m_subgrid_async_progress = false
 
std::unique_ptr< El::Grid > m_secondary_grid
 
std::unique_ptr< El::Grid > m_subset_grid
 
std::vector< int > m_primary_grid_ranks
 
std::vector< int > m_secondary_grid_ranks
 
size_t m_num_trainer_barriers
 
size_t m_num_intertrainer_barriers
 
size_t m_num_global_barriers
 
size_t m_bytes_sent
 
size_t m_bytes_received
 

Detailed Description

Manage communication. This supports separate trainers, each of which are split over potentially several processes. Every trainer is split over the same number of processes. The corresponding processes between trainers are on the "inter-trainer communicator". You can also do point-to-point or broadcast communication to arbitrary sets of processes.

Definition at line 105 of file comm.hpp.

Constructor & Destructor Documentation

◆ lbann_comm() [1/2]

lbann::lbann_comm::lbann_comm ( int  procs_per_trainer = 0,
El::mpi::Comm  world = El::mpi::COMM_WORLD.GetMPIComm() 
)

Init communicators for trainers each with procs_per_trainer processes, defaulting to every process in one trainer.

◆ lbann_comm() [2/2]

lbann::lbann_comm::lbann_comm ( const lbann_comm )
delete

Don't allow copying; it doesn't make sense for the communicator.

◆ ~lbann_comm()

lbann::lbann_comm::~lbann_comm ( )

Member Function Documentation

◆ all_gather() [1/4]

template<typename T >
void lbann::lbann_comm::all_gather ( const T *  src,
int  src_count,
T *  rcv,
int  rcv_count,
const El::mpi::Comm &  c 
) const

Allgather over an arbitrary communicator

Definition at line 176 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ all_gather() [2/4]

template<typename T , El::Device D>
void lbann::lbann_comm::all_gather ( const T *  src,
int  src_count,
T *  rcv,
int  rcv_count,
const El::mpi::Comm &  c,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 190 of file comm_impl.hpp.

◆ all_gather() [3/4]

template<typename T >
void lbann::lbann_comm::all_gather ( std::vector< T > const &  src,
std::vector< T > &  rcs,
std::vector< int > const &  rcv_counts,
std::vector< int > const &  rcv_disp,
const El::mpi::Comm &  c 
) const

Allgatherv over an arbitrary communicator; all vectors must be correctly sized prior to entry.

Definition at line 205 of file comm_impl.hpp.

Here is the call graph for this function:

◆ all_gather() [4/4]

template<typename T >
void lbann::lbann_comm::all_gather ( T const &  src,
std::vector< T > &  data,
const El::mpi::Comm &  c 
) const

Allgather for a single element over an arbitrary communicator; std::vector<T> &data must be correctly sized prior to entry.

Definition at line 243 of file comm_impl.hpp.

◆ allreduce() [1/5]

template<typename T >
T lbann::lbann_comm::allreduce ( snd,
const El::mpi::Comm &  c,
El::mpi::Op  op = El::mpi::SUM 
) const

Scalar allreduce.

Definition at line 643 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ allreduce() [2/5]

template<typename T >
void lbann::lbann_comm::allreduce ( T const *  snd,
int  count,
T *  rcv,
const El::mpi::Comm &  c,
El::mpi::Op  op = El::mpi::SUM 
) const

Scalar-array allreduce.

Definition at line 658 of file comm_impl.hpp.

◆ allreduce() [3/5]

template<typename T >
void lbann::lbann_comm::allreduce ( T *  data,
int  count,
const El::mpi::Comm &  c,
El::mpi::Op  op = El::mpi::SUM 
) const

In-place scalar-array allreduce.

Definition at line 687 of file comm_impl.hpp.

Here is the call graph for this function:

◆ allreduce() [4/5]

template<typename TensorDataType >
void lbann::lbann_comm::allreduce ( El::AbstractMatrix< TensorDataType > &  m,
const El::mpi::Comm &  c,
El::mpi::Op  op = El::mpi::SUM 
) const

Matrix allreduce.

◆ allreduce() [5/5]

template<typename TensorDataType >
void lbann::lbann_comm::allreduce ( El::AbstractDistMatrix< TensorDataType > &  m,
const El::mpi::Comm &  c,
El::mpi::Op  op = El::mpi::SUM 
) const

Matrix allreduce.

◆ am_trainer_master()

bool lbann::lbann_comm::am_trainer_master ( ) const
inlinenoexcept

Return true if this process is the master process in its trainer.

Definition at line 192 of file comm.hpp.

Here is the caller graph for this function:

◆ am_world_master()

bool lbann::lbann_comm::am_world_master ( ) const
inlinenoexcept

Return true if this process is the world master process.

Definition at line 197 of file comm.hpp.

Here is the call graph for this function:

◆ barrier()

void lbann::lbann_comm::barrier ( const El::mpi::Comm &  c) const

Barrier on an arbitrary communicator.

◆ broadcast() [1/4]

template<typename T , bool S>
void lbann::lbann_comm::broadcast ( int  root,
T &  val,
const El::mpi::Comm &  c 
) const

Broadcast a scalar value over an arbitrary communicator.

Definition at line 1021 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ broadcast() [2/4]

template<typename T >
void lbann::lbann_comm::broadcast ( const int  root,
T *  data,
const int  count,
const El::mpi::Comm &  c 
) const

Broadcast a buffer over an arbitrary communicator assuming that the buffer space is already allocated.

Definition at line 60 of file comm_impl.hpp.

Here is the call graph for this function:

◆ broadcast() [3/4]

template<typename T , El::Device D, bool S>
void lbann::lbann_comm::broadcast ( const int  root,
T *  data,
const int  count,
const El::mpi::Comm &  c,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 1058 of file comm_impl.hpp.

Here is the call graph for this function:

◆ broadcast() [4/4]

template<typename T >
void lbann::lbann_comm::broadcast ( const int  root,
std::vector< T > &  data,
const El::mpi::Comm &  c 
) const

Broadcast vector<> over an arbitrary communicator; vector<> for non-root processes will be resized as needed.

Definition at line 140 of file comm_impl.hpp.

Here is the call graph for this function:

◆ broadcast_custom()

template<typename T >
void lbann::lbann_comm::broadcast_custom ( int  root,
T &  val,
const El::mpi::Comm &  c 
) const

Definition at line 1045 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ broadcast_native()

template<typename T >
void lbann::lbann_comm::broadcast_native ( int  root,
T &  val,
const El::mpi::Comm &  c 
) const

Definition at line 1037 of file comm_impl.hpp.

◆ count_bytes_broadcast()

void lbann::lbann_comm::count_bytes_broadcast ( const size_t  bytes,
const int  rank,
const int  root 
) const
inlineprivatenoexcept

Keep track of the number of broadcast bytes transmitted and received

Definition at line 1017 of file comm.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ enable_subgrid_async_communication()

bool lbann::lbann_comm::enable_subgrid_async_communication ( )
inline

Return asynchronous flag for sub-grid parallelism

Definition at line 918 of file comm.hpp.

◆ gather() [1/7]

template<typename T >
void lbann::lbann_comm::gather ( snd,
int  root,
const El::mpi::Comm &  c 
) const

Scalar gather (for non-root processes).

Definition at line 359 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ gather() [2/7]

template<typename T >
void lbann::lbann_comm::gather ( snd,
T *  rcv,
const El::mpi::Comm &  c 
) const

Scalar gather (for root processes).

Definition at line 374 of file comm_impl.hpp.

◆ gather() [3/7]

template<typename T >
void lbann::lbann_comm::gather ( snd,
std::vector< T > &  rcv,
const El::mpi::Comm &  c 
) const

Scalar gather (for root processes).

Definition at line 383 of file comm_impl.hpp.

Here is the call graph for this function:

◆ gather() [4/7]

template<typename T >
void lbann::lbann_comm::gather ( T const *  snd,
int  count,
int  root,
const El::mpi::Comm &  c 
) const

Scalar-array gather (for non-root processes).

Definition at line 391 of file comm_impl.hpp.

Here is the call graph for this function:

◆ gather() [5/7]

template<typename T , El::Device D>
void lbann::lbann_comm::gather ( T const *  snd,
int  count,
int  root,
const El::mpi::Comm &  c,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 399 of file comm_impl.hpp.

◆ gather() [6/7]

template<typename T >
void lbann::lbann_comm::gather ( T const *  snd,
int  count,
T *  rcv,
const El::mpi::Comm &  c 
) const

Scalar-array gather (for root processes).

Definition at line 410 of file comm_impl.hpp.

Here is the call graph for this function:

◆ gather() [7/7]

template<typename T , El::Device D>
void lbann::lbann_comm::gather ( T const *  snd,
int  count,
T *  rcv,
const El::mpi::Comm &  c,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 418 of file comm_impl.hpp.

◆ get_bytes_received()

size_t lbann::lbann_comm::get_bytes_received ( ) const
inlinenoexcept

Return the number of bytes received.

Definition at line 851 of file comm.hpp.

◆ get_bytes_sent()

size_t lbann::lbann_comm::get_bytes_sent ( ) const
inlinenoexcept

Return the number of bytes sent.

Definition at line 849 of file comm.hpp.

◆ get_combined_grid_comm()

const El::mpi::Comm& lbann::lbann_comm::get_combined_grid_comm ( ) const
inlinenoexcept

Return the combined grid communicator for a trainer.

Definition at line 895 of file comm.hpp.

◆ get_count() [1/2]

template<typename T >
int lbann::lbann_comm::get_count ( int  trainer,
int  rank 
) const

Determine the size (count) of an incoming message.

Definition at line 1005 of file comm_impl.hpp.

Here is the call graph for this function:

◆ get_count() [2/2]

template<typename T >
int lbann::lbann_comm::get_count ( int  trainer) const

Definition at line 1015 of file comm_impl.hpp.

◆ get_default_threads_per_proc()

int lbann::lbann_comm::get_default_threads_per_proc ( ) const
inlinenoexcept

Get default number of threads per process. This is the number of OpenMP threads to use for parallel regions, provided omp_set_num_threads has not been called or the num_threads directive has not been provided.

Definition at line 248 of file comm.hpp.

Here is the call graph for this function:

◆ get_grid_type()

GridType lbann::lbann_comm::get_grid_type ( ) const
inlinenoexcept

Get trainer grid number (0: no primary/secondary grid, 1: part of primary grid, 2: part of secondary grid).

Definition at line 152 of file comm.hpp.

◆ get_intertrainer_comm()

const El::mpi::Comm& lbann::lbann_comm::get_intertrainer_comm ( ) const
inlinenoexcept

Developer's note: to get the raw MPI communicator, which may be needed when working with external libraries, by example: comm->get_intertrainer_comm().GetMPIComm()Return the intertrainer communicator.

Definition at line 883 of file comm.hpp.

Here is the caller graph for this function:

◆ get_intertrainer_master()

int lbann::lbann_comm::get_intertrainer_master ( ) const
inlinenoexcept

Return the rank of the inter-trainer master process.

Definition at line 188 of file comm.hpp.

◆ get_KFAC_comm()

const El::mpi::Comm& lbann::lbann_comm::get_KFAC_comm ( ) const
inlinenoexcept

Return the communicator for this grid in sub-grid parallelism.

Definition at line 907 of file comm.hpp.

◆ get_KFAC_subgrid_create_two_models()

bool lbann::lbann_comm::get_KFAC_subgrid_create_two_models ( )
inline

Definition at line 915 of file comm.hpp.

◆ get_node_comm()

const El::mpi::Comm& lbann::lbann_comm::get_node_comm ( ) const
inlinenoexcept

Return the communicator for this node.

Definition at line 904 of file comm.hpp.

◆ get_num_global_barriers()

size_t lbann::lbann_comm::get_num_global_barriers ( ) const
inlinenoexcept

Return the number of global barriers performed.

Definition at line 844 of file comm.hpp.

◆ get_num_intertrainer_barriers()

size_t lbann::lbann_comm::get_num_intertrainer_barriers ( ) const
inlinenoexcept

Return the number of inter-trainer barriers performed.

Definition at line 839 of file comm.hpp.

◆ get_num_trainer_barriers()

size_t lbann::lbann_comm::get_num_trainer_barriers ( ) const
inlinenoexcept

Return the number of trainer barriers performed.

Definition at line 834 of file comm.hpp.

◆ get_num_trainers()

int lbann::lbann_comm::get_num_trainers ( ) const
inlinenoexcept

Return the total number of trainers.

Definition at line 220 of file comm.hpp.

◆ get_packed_group_comm()

const El::mpi::Comm& lbann::lbann_comm::get_packed_group_comm ( int  num_per_group) const

Return a communicator containing num_per_group processors.

This will attempt to pack processes so that the processes in each group are physically close together on the system.

num_per_group must evenly divide the number of processors in the world.

◆ get_primary_grid_ranks()

std::vector<int> lbann::lbann_comm::get_primary_grid_ranks ( )
inline

Return the ranks of primary grid in the trainer

Definition at line 910 of file comm.hpp.

◆ get_procs_in_world()

int lbann::lbann_comm::get_procs_in_world ( ) const
inline

Return the total number of ranks.

Definition at line 229 of file comm.hpp.

◆ get_procs_per_node()

int lbann::lbann_comm::get_procs_per_node ( ) const
inlinenoexcept

Return the number of processes in a compute node.

Definition at line 227 of file comm.hpp.

◆ get_procs_per_trainer()

int lbann::lbann_comm::get_procs_per_trainer ( ) const
inlinenoexcept

Definition at line 222 of file comm.hpp.

Here is the caller graph for this function:

◆ get_rank_in_node()

int lbann::lbann_comm::get_rank_in_node ( ) const
inlinenoexcept

Return the rank of this process within its compute node.

Definition at line 234 of file comm.hpp.

◆ get_rank_in_trainer()

int lbann::lbann_comm::get_rank_in_trainer ( ) const
inlinenoexcept

Get the rank of this process in its trainer.

Definition at line 157 of file comm.hpp.

Here is the caller graph for this function:

◆ get_rank_in_world()

int lbann::lbann_comm::get_rank_in_world ( ) const
inline

Get my rank in COMM_WORLD.

Definition at line 159 of file comm.hpp.

◆ get_secondary_grid() [1/2]

El::Grid& lbann::lbann_comm::get_secondary_grid ( )
inline

Return secondary grid to use for this trainer when sub-grid parallelism is enabled.

Definition at line 207 of file comm.hpp.

◆ get_secondary_grid() [2/2]

const El::Grid& lbann::lbann_comm::get_secondary_grid ( ) const
inline

Return read-only secondary grid to use for this trainer.

Definition at line 209 of file comm.hpp.

◆ get_secondary_grid_ranks()

std::vector<int> lbann::lbann_comm::get_secondary_grid_ranks ( )
inline

Return the ranks of secondary grid in the trainer

Definition at line 913 of file comm.hpp.

◆ get_subset_grid() [1/2]

El::Grid& lbann::lbann_comm::get_subset_grid ( )
inline

Return subset grid to use for this trainer when sub-grid parallelism is enabled.

Definition at line 215 of file comm.hpp.

◆ get_subset_grid() [2/2]

const El::Grid& lbann::lbann_comm::get_subset_grid ( ) const
inline

Return read-only subset grid to use for this trainer when sub-grid parallelism is enabled.

Definition at line 218 of file comm.hpp.

◆ get_trainer_comm()

const El::mpi::Comm& lbann::lbann_comm::get_trainer_comm ( ) const
inlinenoexcept

Return the trainer communicator.

Definition at line 889 of file comm.hpp.

Here is the caller graph for this function:

◆ get_trainer_grid() [1/2]

El::Grid& lbann::lbann_comm::get_trainer_grid ( )
inline

Return a grid to use for this trainer.

Definition at line 202 of file comm.hpp.

Here is the caller graph for this function:

◆ get_trainer_grid() [2/2]

const El::Grid& lbann::lbann_comm::get_trainer_grid ( ) const
inline

Return a read-only grid to use for this trainer.

Definition at line 204 of file comm.hpp.

◆ get_trainer_master()

int lbann::lbann_comm::get_trainer_master ( ) const
inlinenoexcept

Return the rank of the master process in this trainer.

Definition at line 186 of file comm.hpp.

◆ get_trainer_rank()

int lbann::lbann_comm::get_trainer_rank ( ) const
inlinenoexcept

Get which trainer this process is in.

Definition at line 155 of file comm.hpp.

◆ get_world_comm()

const El::mpi::Comm& lbann::lbann_comm::get_world_comm ( ) const
inlinenoexcept

Return the world communicator.

Definition at line 901 of file comm.hpp.

Here is the caller graph for this function:

◆ get_world_master()

int lbann::lbann_comm::get_world_master ( ) const
inlinenoexcept

Return the rank of the world master process.

Definition at line 190 of file comm.hpp.

◆ get_world_rank()

int lbann::lbann_comm::get_world_rank ( int  trainer,
int  rank 
) const
inlinenoexcept

Return the COMM_WORLD rank of the rank'th processor in trainer.

Definition at line 164 of file comm.hpp.

Here is the caller graph for this function:

◆ global_barrier()

void lbann::lbann_comm::global_barrier ( ) const

Barrier among all processes.

◆ intertrainer_allreduce()

template<typename T >
T lbann::lbann_comm::intertrainer_allreduce ( snd,
El::mpi::Op  op = El::mpi::SUM 
) const

Inter-trainer all-reduce.

Definition at line 622 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_barrier()

void lbann::lbann_comm::intertrainer_barrier ( ) const

Barrier among the inter-trainer processes.

◆ intertrainer_broadcast() [1/4]

template<typename T >
void lbann::lbann_comm::intertrainer_broadcast ( int  root,
T &  val 
) const

Inter-trainer broadcast of a scalar.

Definition at line 42 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ intertrainer_broadcast() [2/4]

template<typename T >
void lbann::lbann_comm::intertrainer_broadcast ( const int  root,
T *  data,
const int  count 
) const

Inter-trainer broadcast of a buffer.

Definition at line 87 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_broadcast() [3/4]

template<typename T , El::Device D>
void lbann::lbann_comm::intertrainer_broadcast ( const int  root,
T *  data,
const int  count,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 94 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_broadcast() [4/4]

template<typename T >
void lbann::lbann_comm::intertrainer_broadcast ( int  root,
std::vector< T > &  data 
) const

Broadcast vector<> across trainers.

Broadcast vector<> within trainer; vector<> for non-root processes will be resized as needed.

Definition at line 162 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_broadcast_matrix() [1/2]

void lbann::lbann_comm::intertrainer_broadcast_matrix ( AbsMat mat,
int  root 
) const

Broadcast mat over the inter-trainer communicator starting from root.

◆ intertrainer_broadcast_matrix() [2/2]

void lbann::lbann_comm::intertrainer_broadcast_matrix ( AbsDistMat mat,
int  root 
) const

◆ intertrainer_gather() [1/4]

template<typename T >
void lbann::lbann_comm::intertrainer_gather ( snd,
int  root 
) const

Inter-trainer gather (for non-root processes).

Definition at line 331 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_gather() [2/4]

template<typename T >
void lbann::lbann_comm::intertrainer_gather ( snd,
std::vector< T > &  rcv 
) const

Inter-trainer gather (for root processes).

Definition at line 337 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_gather() [3/4]

template<typename T >
void lbann::lbann_comm::intertrainer_gather ( T const *  snd,
int  count,
int  root 
) const

Inter-trainer scalar-array gather (for non-root processes).

Definition at line 343 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_gather() [4/4]

template<typename T >
void lbann::lbann_comm::intertrainer_gather ( T const *  snd,
int  count,
T *  rcv 
) const

Inter-trainer scalar-array gather (for root processes).

Definition at line 351 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_reduce() [1/2]

template<typename T >
void lbann::lbann_comm::intertrainer_reduce ( snd,
int  root,
El::mpi::Op  op = El::mpi::SUM 
) const

Inter-trainer reduce (for non-root processes).

Definition at line 456 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_reduce() [2/2]

template<typename T >
T lbann::lbann_comm::intertrainer_reduce ( snd,
El::mpi::Op  op = El::mpi::SUM 
) const

Inter-trainer reduce (for root processes).

Definition at line 464 of file comm_impl.hpp.

Here is the call graph for this function:

◆ intertrainer_sum_matrix() [1/2]

void lbann::lbann_comm::intertrainer_sum_matrix ( AbsMat mat) const

Perform a sum reduction of mat over the inter-trainer communicator.

◆ intertrainer_sum_matrix() [2/2]

void lbann::lbann_comm::intertrainer_sum_matrix ( AbsDistMat mat) const

◆ is_rank_node_local()

bool lbann::lbann_comm::is_rank_node_local ( int  rank,
const El::mpi::Comm &  comm 
) const
inline

Return true if rank (in comm) is on the local node.

Definition at line 931 of file comm.hpp.

◆ is_sendable() [1/2]

static bool lbann::lbann_comm::is_sendable ( const AbsMat mat)
inlinestaticnoexcept

Return true if mat can be transmitted.

Definition at line 863 of file comm.hpp.

◆ is_sendable() [2/2]

static bool lbann::lbann_comm::is_sendable ( const AbsDistMat dist_mat)
inlinestaticnoexcept

Return true if the local portion of dist_mat can be transmitted.

Definition at line 872 of file comm.hpp.

◆ is_world_rank_on_node()

bool lbann::lbann_comm::is_world_rank_on_node ( int  rank) const
inline

Return true if rank (in COMM_WORLD) is on this compute node.

Definition at line 236 of file comm.hpp.

◆ lbann_comm_abort()

void lbann::lbann_comm::lbann_comm_abort ( std::string  msg) const

throws an lbann_exception

Here is the caller graph for this function:

◆ map_world_rank_to_rank_in_trainer()

int lbann::lbann_comm::map_world_rank_to_rank_in_trainer ( int  world_rank) const
inlinenoexcept

Return the "rank" within the trainer that this rank is in

Definition at line 181 of file comm.hpp.

◆ map_world_rank_to_trainer_rank()

int lbann::lbann_comm::map_world_rank_to_trainer_rank ( int  world_rank) const
inlinenoexcept

Return the "rank" of the trainer that this rank is in

Definition at line 176 of file comm.hpp.

◆ nb_allreduce() [1/3]

template<typename TensorDataType >
void lbann::lbann_comm::nb_allreduce ( El::AbstractMatrix< TensorDataType > &  m,
const El::mpi::Comm &  c,
Al::request req,
El::mpi::Op  op = El::mpi::SUM 
) const

Non-blocking matrix allreduce. If LBANN has not been built with Aluminum, then this calls a blocking matrix allreduce.

Here is the caller graph for this function:

◆ nb_allreduce() [2/3]

template<typename TensorDataType >
void lbann::lbann_comm::nb_allreduce ( El::AbstractDistMatrix< TensorDataType > &  m,
const El::mpi::Comm &  c,
Al::request req,
El::mpi::Op  op = El::mpi::SUM 
) const

Non-blocking matrix allreduce. If LBANN has not been built with Aluminum, then this calls a blocking matrix allreduce.

◆ nb_allreduce() [3/3]

template<typename T >
void lbann::lbann_comm::nb_allreduce ( T *  data,
int  count,
const El::mpi::Comm &  c,
Al::request req,
El::mpi::Op  op = El::mpi::SUM 
) const

Non-blocking in-place scalar-array allreduce. If LBANN has not been built with Aluminum, then this calls a blocking allreduce. This currently only supports host pointers (i.e. the MPI backend).

Definition at line 718 of file comm_impl.hpp.

Here is the call graph for this function:

◆ nb_recv() [1/9]

template<typename T >
void lbann::lbann_comm::nb_recv ( T *  data,
int  count,
int  trainer,
int  rank,
El::mpi::Request< T > &  req 
) const

Corresponding non-blocking receives.

Definition at line 879 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nb_recv() [2/9]

template<typename T >
void lbann::lbann_comm::nb_recv ( T *  data,
int  count,
int  trainer,
El::mpi::Request< T > &  req 
) const

Definition at line 905 of file comm_impl.hpp.

Here is the call graph for this function:

◆ nb_recv() [3/9]

void lbann::lbann_comm::nb_recv ( AbsMat mat,
int  trainer,
int  rank,
El::mpi::Request< DataType > &  req 
) const

◆ nb_recv() [4/9]

void lbann::lbann_comm::nb_recv ( DistMat mat,
int  trainer,
int  rank,
El::mpi::Request< DataType > &  req 
) const

◆ nb_recv() [5/9]

void lbann::lbann_comm::nb_recv ( AbsMat mat,
int  trainer,
El::mpi::Request< DataType > &  req 
) const
inline

Definition at line 776 of file comm.hpp.

◆ nb_recv() [6/9]

void lbann::lbann_comm::nb_recv ( DistMat mat,
int  trainer,
El::mpi::Request< DataType > &  req 
) const
inline

Definition at line 780 of file comm.hpp.

◆ nb_recv() [7/9]

template<typename T >
void lbann::lbann_comm::nb_recv ( T *  data,
int  count,
El::mpi::Request< T > &  req 
) const

Definition at line 913 of file comm_impl.hpp.

Here is the call graph for this function:

◆ nb_recv() [8/9]

void lbann::lbann_comm::nb_recv ( AbsMat mat,
El::mpi::Request< DataType > &  req 
) const

◆ nb_recv() [9/9]

void lbann::lbann_comm::nb_recv ( DistMat mat,
El::mpi::Request< DataType > &  req 
) const

◆ nb_send() [1/6]

template<typename T >
void lbann::lbann_comm::nb_send ( const T *  data,
int  count,
int  trainer,
int  rank,
El::mpi::Request< T > &  req 
) const

Corresponding non-blocking sends.

Definition at line 793 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nb_send() [2/6]

template<typename T >
void lbann::lbann_comm::nb_send ( const T *  data,
int  count,
int  trainer,
El::mpi::Request< T > &  req 
) const

Definition at line 818 of file comm_impl.hpp.

Here is the call graph for this function:

◆ nb_send() [3/6]

void lbann::lbann_comm::nb_send ( const AbsMat mat,
int  trainer,
int  rank,
El::mpi::Request< DataType > &  req 
) const

◆ nb_send() [4/6]

void lbann::lbann_comm::nb_send ( const DistMat mat,
int  trainer,
int  rank,
El::mpi::Request< DataType > &  req 
) const

◆ nb_send() [5/6]

void lbann::lbann_comm::nb_send ( const AbsMat mat,
int  trainer,
El::mpi::Request< DataType > &  req 
) const
inline

Definition at line 708 of file comm.hpp.

◆ nb_send() [6/6]

void lbann::lbann_comm::nb_send ( const DistMat mat,
int  trainer,
El::mpi::Request< DataType > &  req 
) const
inline

Definition at line 712 of file comm.hpp.

◆ nb_tagged_recv()

template<typename T >
void lbann::lbann_comm::nb_tagged_recv ( T *  data,
int  count,
int  rank,
int  tag,
El::mpi::Request< T > &  req,
const El::mpi::Comm &  c 
) const

Definition at line 893 of file comm_impl.hpp.

◆ nb_tagged_send()

template<typename T >
void lbann::lbann_comm::nb_tagged_send ( const T *  data,
int  count,
int  rank,
int  tag,
El::mpi::Request< T > &  req,
const El::mpi::Comm &  c 
) const

Definition at line 807 of file comm_impl.hpp.

◆ operator=()

lbann_comm& lbann::lbann_comm::operator= ( const lbann_comm )
delete

Don't allow assignment; it doesn't make sense for the communicator.

◆ recv() [1/12]

template<typename T >
void lbann::lbann_comm::recv ( T *  data,
int  count,
int  trainer,
int  rank 
) const

Corresponding receive to send.

Definition at line 828 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ recv() [2/12]

template<typename T >
void lbann::lbann_comm::recv ( T *  data,
int  count,
int  trainer 
) const

Definition at line 836 of file comm_impl.hpp.

Here is the call graph for this function:

◆ recv() [3/12]

template<typename T >
void lbann::lbann_comm::recv ( T *  data,
int  count 
) const

Definition at line 841 of file comm_impl.hpp.

Here is the call graph for this function:

◆ recv() [4/12]

template<typename T , El::Device D>
void lbann::lbann_comm::recv ( T *  data,
int  count,
int  trainer,
int  rank,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 846 of file comm_impl.hpp.

Here is the call graph for this function:

◆ recv() [5/12]

template<typename T , El::Device D>
void lbann::lbann_comm::recv ( T *  data,
int  count,
int  trainer,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 860 of file comm_impl.hpp.

Here is the call graph for this function:

◆ recv() [6/12]

void lbann::lbann_comm::recv ( AbsMat mat,
int  trainer,
int  rank 
) const

◆ recv() [7/12]

void lbann::lbann_comm::recv ( DistMat mat,
int  trainer,
int  rank 
) const

◆ recv() [8/12]

void lbann::lbann_comm::recv ( AbsMat mat,
int  trainer 
) const
inline

Definition at line 737 of file comm.hpp.

◆ recv() [9/12]

void lbann::lbann_comm::recv ( DistMat mat,
int  trainer 
) const
inline

Definition at line 741 of file comm.hpp.

◆ recv() [10/12]

template<typename T , El::Device D>
void lbann::lbann_comm::recv ( T *  data,
int  count,
El::SyncInfo< D > const &  syncInfo 
) const

As above, but receive from anyone.

Definition at line 869 of file comm_impl.hpp.

Here is the call graph for this function:

◆ recv() [11/12]

void lbann::lbann_comm::recv ( AbsMat mat) const

◆ recv() [12/12]

void lbann::lbann_comm::recv ( DistMat mat) const

◆ reduce() [1/10]

template<typename T >
void lbann::lbann_comm::reduce ( snd,
int  root,
const El::mpi::Comm &  c,
El::mpi::Op  op = El::mpi::SUM 
) const

Scalar reduce (for non-root processes).

Definition at line 502 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ reduce() [2/10]

template<typename T >
T lbann::lbann_comm::reduce ( snd,
const El::mpi::Comm &  c,
El::mpi::Op  op = El::mpi::SUM 
) const

Scalar reduce (for root processes).

Definition at line 518 of file comm_impl.hpp.

◆ reduce() [3/10]

template<typename T >
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
int  root,
const El::mpi::Comm &  c 
) const

Scalar-array reduce (for non-root processes).

Definition at line 539 of file comm_impl.hpp.

Here is the call graph for this function:

◆ reduce() [4/10]

template<typename T , El::Device D>
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
int  root,
const El::mpi::Comm &  c,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 547 of file comm_impl.hpp.

Here is the call graph for this function:

◆ reduce() [5/10]

template<typename T >
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
int  root,
const El::mpi::Comm &  c,
El::mpi::Op  op 
) const

Definition at line 557 of file comm_impl.hpp.

Here is the call graph for this function:

◆ reduce() [6/10]

template<typename T , El::Device D>
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
int  root,
const El::mpi::Comm &  c,
El::mpi::Op  op,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 566 of file comm_impl.hpp.

◆ reduce() [7/10]

template<typename T , El::Device D>
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
T *  rcv,
const El::mpi::Comm &  c,
El::SyncInfo< D > const &  syncInfo 
) const

Scalar-array reduce (for root processes).

Definition at line 578 of file comm_impl.hpp.

Here is the call graph for this function:

◆ reduce() [8/10]

template<typename T >
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
T *  rcv,
const El::mpi::Comm &  c 
) const

Definition at line 587 of file comm_impl.hpp.

Here is the call graph for this function:

◆ reduce() [9/10]

template<typename T >
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
T *  rcv,
const El::mpi::Comm &  c,
El::mpi::Op  op 
) const

Definition at line 596 of file comm_impl.hpp.

Here is the call graph for this function:

◆ reduce() [10/10]

template<typename T , El::Device D>
void lbann::lbann_comm::reduce ( T const *  snd,
int  count,
T *  rcv,
const El::mpi::Comm &  c,
El::mpi::Op  op,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 605 of file comm_impl.hpp.

◆ reset_stats_counters()

void lbann::lbann_comm::reset_stats_counters ( )
inlinenoexcept

Definition at line 853 of file comm.hpp.

◆ reset_threads()

void lbann::lbann_comm::reset_threads ( ) const
noexcept

Reset the number of threads per process to the default.

◆ resize()

template<typename T >
size_t lbann::lbann_comm::resize ( const int  root,
std::vector< T > &  data,
const El::mpi::Comm &  c 
) const

Resize vector<> over an arbitrary communicator to match the one on root.

Definition at line 123 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ scatter() [1/2]

template<typename T >
T lbann::lbann_comm::scatter ( int  root,
const El::mpi::Comm &  c 
) const

Scalar scatter (for non-root processes).

Definition at line 431 of file comm_impl.hpp.

◆ scatter() [2/2]

template<typename T >
T lbann::lbann_comm::scatter ( T const *  snd,
const El::mpi::Comm &  c 
) const

Scalar scatter (for root processes).

Definition at line 446 of file comm_impl.hpp.

◆ send() [1/7]

template<typename T >
void lbann::lbann_comm::send ( const T *  data,
int  count,
int  trainer,
int  rank 
) const

Send a buffer to rank in trainer.

Definition at line 761 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ send() [2/7]

template<typename T , El::Device D>
void lbann::lbann_comm::send ( const T *  data,
int  count,
int  trainer,
int  rank,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 769 of file comm_impl.hpp.

Here is the call graph for this function:

◆ send() [3/7]

template<typename T , El::Device D>
void lbann::lbann_comm::send ( const T *  data,
int  count,
int  trainer,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 783 of file comm_impl.hpp.

Here is the call graph for this function:

◆ send() [4/7]

void lbann::lbann_comm::send ( const AbsMat mat,
int  trainer,
int  rank 
) const

◆ send() [5/7]

void lbann::lbann_comm::send ( const DistMat mat,
int  trainer,
int  rank 
) const

◆ send() [6/7]

void lbann::lbann_comm::send ( const AbsMat mat,
int  trainer 
) const
inline

Definition at line 671 of file comm.hpp.

◆ send() [7/7]

void lbann::lbann_comm::send ( const DistMat mat,
int  trainer 
) const
inline

Definition at line 675 of file comm.hpp.

◆ sendrecv() [1/4]

template<typename T , El::Device D>
void lbann::lbann_comm::sendrecv ( const T *  snd,
int  send_count,
int  send_trainer,
int  send_rank,
T *  rcv,
int  recv_count,
int  recv_trainer,
int  recv_rank 
) const

Send/recv to/from ranks.

Definition at line 923 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ sendrecv() [2/4]

template<typename T , El::Device D>
void lbann::lbann_comm::sendrecv ( const T *  snd,
int  send_count,
int  send_trainer,
T *  rcv,
int  recv_count,
int  recv_trainer 
) const

Definition at line 943 of file comm_impl.hpp.

Here is the call graph for this function:

◆ sendrecv() [3/4]

template<typename T , El::Device D>
void lbann::lbann_comm::sendrecv ( const T *  snd,
int  send_count,
int  send_trainer,
int  send_rank,
T *  rcv,
int  recv_count,
int  recv_trainer,
int  recv_rank,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 962 of file comm_impl.hpp.

Here is the call graph for this function:

◆ sendrecv() [4/4]

template<typename T , El::Device D>
void lbann::lbann_comm::sendrecv ( const T *  snd,
int  send_count,
int  send_trainer,
T *  rcv,
int  recv_count,
int  recv_trainer,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 984 of file comm_impl.hpp.

Here is the call graph for this function:

◆ setup_node_comm()

void lbann::lbann_comm::setup_node_comm ( )
private

Setup communicator for processes in the same compute node.

◆ setup_threads()

void lbann::lbann_comm::setup_threads ( )
private

Initialize the default number of threads per process. This is the number of OpenMP threads to use for parallel regions, provided omp_set_num_threads has not been called or the num_threads directive has not been provided. If the environment variable OMP_NUM_THREADS is defined, it's value is used for the default. Otherwise, then the default is the number of hardware cores per node divided by the number of processes per node.

◆ split_trainer_grid()

void lbann::lbann_comm::split_trainer_grid ( int  num_process_primary_grid = 0,
bool  create_two_models = false,
bool  enable_async_comm = false,
bool  enable_topo_aware = false 
)

Split the commicator for the given trainer into primary and secondary

Parameters
num_process_primar_gridAbsolute number of MPI ranks assigned to the primary grid
create_two_modelsCreate a secondary copy of the model on the secondary grid to perform redundant computation and minimize communication.
enable_async_commUse non-blocking sends and receivces
enable_topo_awareAssign primary and secondary grid resources so that they are interleaved and thus should be allocated to the same compute node assuming that there are always an even number of accelerators per node.

◆ split_trainers()

void lbann::lbann_comm::split_trainers ( int  procs_per_trainer = -1,
int  trainer_grid_height = -1 
)

Construct communicators for trainers.

Invalidates any existing trainer communicators.

Parameters
procs_per_trainerNumber of MPI ranks in a trainer. Default is size of world communicator.
trainer_grid_heightHeight of 2D process grid for each trainer. Must divide procs_per_trainer. Default grid is approximately square.

◆ test()

bool lbann::lbann_comm::test ( Al::request req) const

Test whether a non-blocking request has completed; true if it has.

◆ trainer_all_gather() [1/2]

template<typename T >
void lbann::lbann_comm::trainer_all_gather ( std::vector< T > const &  src,
std::vector< T > &  rcs,
std::vector< int > const &  rcv_counts,
std::vector< int > const &  rcv_disp 
) const

Allgatherv over a trainer communicator; all vectors must be correctly sized prior to entry.

Definition at line 231 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ trainer_all_gather() [2/2]

template<typename T >
void lbann::lbann_comm::trainer_all_gather ( T const &  src,
std::vector< T > &  data 
) const

Allgather for a single element over the trainer communicator; std::vector<T> &data must be correctly sized prior to entry.

Definition at line 268 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_allreduce() [1/2]

template<typename T >
T lbann::lbann_comm::trainer_allreduce ( snd,
El::mpi::Op  op = El::mpi::SUM 
) const

Within-trainer all-reduce.

Definition at line 628 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_allreduce() [2/2]

template<typename T >
void lbann::lbann_comm::trainer_allreduce ( T const *  snd,
int  count,
T *  rcv,
El::mpi::Op  op = El::mpi::SUM 
) const

Scalar array within-trainer all-reduce.

Definition at line 634 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_barrier()

void lbann::lbann_comm::trainer_barrier ( ) const

Barrier among processes in this trainer.

Here is the caller graph for this function:

◆ trainer_broadcast() [1/4]

template<typename T >
void lbann::lbann_comm::trainer_broadcast ( int  root,
T &  val 
) const

Within-trainer broadcast of a scalar.

Definition at line 48 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ trainer_broadcast() [2/4]

template<typename T >
void lbann::lbann_comm::trainer_broadcast ( const int  root,
T *  data,
const int  count 
) const

Within-trainer broadcast of a buffer.

Definition at line 103 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_broadcast() [3/4]

template<typename T , El::Device D>
void lbann::lbann_comm::trainer_broadcast ( const int  root,
T *  data,
const int  count,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 111 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_broadcast() [4/4]

template<typename T >
void lbann::lbann_comm::trainer_broadcast ( int  root,
std::vector< T > &  data 
) const

Broadcast vector<> within trainer.

Definition at line 169 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_gather() [1/4]

template<typename T >
void lbann::lbann_comm::trainer_gather ( snd,
int  root 
) const

Within-trainer scalar gather (for non-root processes).

Definition at line 275 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_gather() [2/4]

template<typename T >
void lbann::lbann_comm::trainer_gather ( snd,
T *  rcv 
) const

Within-trainer scalar gather (for root processes).

Definition at line 281 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_gather() [3/4]

template<typename T >
void lbann::lbann_comm::trainer_gather ( T const *  snd,
int  count,
int  root 
) const

Within-trainer scalar-array gather (for non-root processes).

Definition at line 287 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_gather() [4/4]

template<typename T >
void lbann::lbann_comm::trainer_gather ( T const *  snd,
int  count,
T *  rcv 
) const

Within-trainer scalar-array gather (for root processes).

Definition at line 295 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_gatherv() [1/2]

template<typename T >
void lbann::lbann_comm::trainer_gatherv ( T const *  snd,
int  count,
int  root 
) const

Within-trainer variable-length-array gather (for non-root processes).

Definition at line 303 of file comm_impl.hpp.

◆ trainer_gatherv() [2/2]

template<typename T >
void lbann::lbann_comm::trainer_gatherv ( T const *  snd,
int  count,
T *  rcv,
int const *  rcv_counts,
int const *  rcv_displacements 
) const

Definition at line 311 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_reduce() [1/4]

template<typename T >
void lbann::lbann_comm::trainer_reduce ( snd,
int  root,
El::mpi::Op  op = El::mpi::SUM 
) const

Within-trainer reduce (for non-root processes).

Definition at line 470 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_reduce() [2/4]

template<typename T >
T lbann::lbann_comm::trainer_reduce ( snd,
El::mpi::Op  op = El::mpi::SUM 
) const

Within-trainer reduce (for root processes).

Definition at line 478 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_reduce() [3/4]

template<typename T >
void lbann::lbann_comm::trainer_reduce ( T const *  snd,
int  count,
int  root,
El::mpi::Op  op = El::mpi::SUM 
) const

Within-trainer scalar array reduce (for non-root processes).

Definition at line 484 of file comm_impl.hpp.

Here is the call graph for this function:

◆ trainer_reduce() [4/4]

template<typename T >
void lbann::lbann_comm::trainer_reduce ( T const *  snd,
int  count,
T *  rcv,
El::mpi::Op  op = El::mpi::SUM 
) const

Within-trainer scalar array reduce (for root processes).

Definition at line 493 of file comm_impl.hpp.

Here is the call graph for this function:

◆ wait() [1/2]

template<typename T >
void lbann::lbann_comm::wait ( El::mpi::Request< T > &  req) const

Wait for a non-blocking request to complete.

Definition at line 754 of file comm_impl.hpp.

Here is the caller graph for this function:

◆ wait() [2/2]

void lbann::lbann_comm::wait ( Al::request req) const

Wait for a non-blocking request to complete.

◆ wait_all()

template<typename T >
void lbann::lbann_comm::wait_all ( std::vector< El::mpi::Request< T >> &  req) const

Wait for a all non-blocking requests to complete.

Definition at line 747 of file comm_impl.hpp.

◆ world_all_gather()

template<typename T >
void lbann::lbann_comm::world_all_gather ( T const &  src,
std::vector< T > &  data 
) const

Allgather for a single element over the world communicator; std::vector<T> &data must be correctly sized prior to entry.

Definition at line 259 of file comm_impl.hpp.

Here is the call graph for this function:

◆ world_broadcast() [1/4]

template<typename T >
void lbann::lbann_comm::world_broadcast ( int  root,
T &  val 
) const

World broadcast of a scalar.

Definition at line 36 of file comm_impl.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ world_broadcast() [2/4]

template<typename T >
void lbann::lbann_comm::world_broadcast ( const int  root,
T *  data,
const int  count 
) const

World broadcast of a buffer.

Definition at line 70 of file comm_impl.hpp.

Here is the call graph for this function:

◆ world_broadcast() [3/4]

template<typename T , El::Device D>
void lbann::lbann_comm::world_broadcast ( const int  root,
T *  data,
const int  count,
El::SyncInfo< D > const &  syncInfo 
) const

Definition at line 78 of file comm_impl.hpp.

Here is the call graph for this function:

◆ world_broadcast() [4/4]

template<typename T >
void lbann::lbann_comm::world_broadcast ( int  root,
std::vector< T > &  data 
) const

Broadcast vector<> to world.

Definition at line 152 of file comm_impl.hpp.

Here is the call graph for this function:

Member Data Documentation

◆ m_bytes_received

size_t lbann::lbann_comm::m_bytes_received
mutableprivate

Definition at line 999 of file comm.hpp.

◆ m_bytes_sent

size_t lbann::lbann_comm::m_bytes_sent
mutableprivate

Definition at line 998 of file comm.hpp.

◆ m_combined_grid_comm

El::mpi::Comm lbann::lbann_comm::m_combined_grid_comm
private

Combined communicator for primary and secondary grid in each trainer

Definition at line 955 of file comm.hpp.

◆ m_create_two_models

bool lbann::lbann_comm::m_create_two_models = false
private

Definition at line 984 of file comm.hpp.

◆ m_grid

std::unique_ptr<El::Grid> lbann::lbann_comm::m_grid
private

Grid for this trainer.

Definition at line 959 of file comm.hpp.

◆ m_grid_type

GridType lbann::lbann_comm::m_grid_type = GridType::NO_GRID
private

Grid type for current process when sub-grid parallelism is enabled

Definition at line 982 of file comm.hpp.

◆ m_group_communicators

std::unordered_map<int, El::mpi::Comm> lbann::lbann_comm::m_group_communicators
mutableprivate

Packed group communicators.

Definition at line 957 of file comm.hpp.

◆ m_intertrainer_comm

El::mpi::Comm lbann::lbann_comm::m_intertrainer_comm
private

Communicator for every process with the same trainer rank.

Definition at line 947 of file comm.hpp.

◆ m_node_comm

El::mpi::Comm lbann::lbann_comm::m_node_comm
private

Communicator for every process in the same compute node.

Definition at line 949 of file comm.hpp.

◆ m_num_global_barriers

size_t lbann::lbann_comm::m_num_global_barriers
mutableprivate

Definition at line 997 of file comm.hpp.

◆ m_num_intertrainer_barriers

size_t lbann::lbann_comm::m_num_intertrainer_barriers
mutableprivate

Definition at line 996 of file comm.hpp.

◆ m_num_trainer_barriers

size_t lbann::lbann_comm::m_num_trainer_barriers
mutableprivate

Definition at line 995 of file comm.hpp.

◆ m_num_trainers

int lbann::lbann_comm::m_num_trainers
private

Number of trainers.

Definition at line 961 of file comm.hpp.

◆ m_primary_grid_comm

El::mpi::Comm lbann::lbann_comm::m_primary_grid_comm
private

Communicator for primary grid in each trainer

Definition at line 951 of file comm.hpp.

◆ m_primary_grid_ranks

std::vector<int> lbann::lbann_comm::m_primary_grid_ranks
private

Ranks in primary and secondary grids

Definition at line 991 of file comm.hpp.

◆ m_procs_per_node

int lbann::lbann_comm::m_procs_per_node
private

Number of processers per compute node.

Definition at line 969 of file comm.hpp.

◆ m_procs_per_trainer

int lbann::lbann_comm::m_procs_per_trainer
private

Number of processors per trainer.

Definition at line 963 of file comm.hpp.

◆ m_rank_in_node

int lbann::lbann_comm::m_rank_in_node
private

Rank of this process within its compute node.

Definition at line 971 of file comm.hpp.

◆ m_rank_in_trainer

int lbann::lbann_comm::m_rank_in_trainer
private

Rank of this process within its trainer.

Definition at line 967 of file comm.hpp.

◆ m_secondary_grid

std::unique_ptr<El::Grid> lbann::lbann_comm::m_secondary_grid
private

Definition at line 986 of file comm.hpp.

◆ m_secondary_grid_comm

El::mpi::Comm lbann::lbann_comm::m_secondary_grid_comm
private

Communicator for secondary grid in each trainer

Definition at line 953 of file comm.hpp.

◆ m_secondary_grid_ranks

std::vector<int> lbann::lbann_comm::m_secondary_grid_ranks
private

Definition at line 992 of file comm.hpp.

◆ m_subgrid_async_progress

bool lbann::lbann_comm::m_subgrid_async_progress = false
private

Definition at line 984 of file comm.hpp.

◆ m_subset_grid

std::unique_ptr<El::Grid> lbann::lbann_comm::m_subset_grid
private

Definition at line 986 of file comm.hpp.

◆ m_threads_per_proc

int lbann::lbann_comm::m_threads_per_proc
private

Default number of threads per process. This is the number of OpenMP threads to use for parallel regions, provided omp_set_num_threads has not been called or the num_threads directive has not been provided.

Definition at line 979 of file comm.hpp.

◆ m_trainer_comm

El::mpi::Comm lbann::lbann_comm::m_trainer_comm
private

Communicator for every process in this trainer.

Definition at line 945 of file comm.hpp.

◆ m_trainer_rank

int lbann::lbann_comm::m_trainer_rank
private

Rank of the trainer this process is in.

Definition at line 965 of file comm.hpp.

◆ m_world_comm

const El::mpi::Comm lbann::lbann_comm::m_world_comm
private

World communicator.

Definition at line 943 of file comm.hpp.

◆ m_world_ranks_on_node

std::vector<int> lbann::lbann_comm::m_world_ranks_on_node
private

The list of world ranks that are on this compute node.

Definition at line 973 of file comm.hpp.


The documentation for this class was generated from the following files: