|
LBANN
0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
|
#include <data_store_conduit.hpp>
Public Types | |
| using | map_ii_t = std::unordered_map< int, int > |
| using | map_is_t = std::unordered_map< int, size_t > |
| using | map_pssi_t = std::unordered_map< std::pair< size_t, size_t >, int, size_t_pair_hash > |
| using | map_ss_t = std::unordered_map< size_t, size_t > |
Public Member Functions | |
| data_store_conduit (generic_data_reader *reader) | |
| ctor More... | |
| data_store_conduit (const data_store_conduit &) | |
| copy ctor More... | |
| data_store_conduit (const data_store_conduit &, const std::vector< int > &) | |
| copy / split ctor More... | |
| data_store_conduit & | operator= (const data_store_conduit &) |
| operator= More... | |
| data_store_conduit * | copy () const |
| ~data_store_conduit () | |
| dtor More... | |
| void | set_data_reader_ptr (generic_data_reader *reader) |
| void | set_shuffled_indices (const std::vector< int > *indices) |
| convenience handle More... | |
| size_t | get_num_global_indices () const |
| Returns the number of samples summed over all ranks. More... | |
| void | setup (int mini_batch_size) |
| void | check_mem_capacity (lbann_comm *comm, const std::string sample_list_file, size_t stride, size_t offset) |
| const conduit::Node & | get_conduit_node (int data_id) const |
| Returns the conduit Node associated with the data_id. More... | |
| void | set_conduit_node (int data_id, const conduit::Node &node, bool already_have=false) |
| Set a conduit node in the data store. More... | |
| void | set_preloaded_conduit_node (int data_id, const conduit::Node &node) |
| void | spill_preloaded_conduit_node (int data_id, const conduit::Node &node) |
| const conduit::Node & | get_random_node () const |
| const conduit::Node & | get_random_node (const std::string &field) const |
| conduit::Node & | get_empty_node (int data_id) |
| returns an empty node More... | |
| bool | is_preloading () const |
| Returns true if preloading is turned on. More... | |
| bool | is_explicitly_loading () const |
| Returns true if explicitly loading is turned on. More... | |
| bool | is_fully_loaded () const |
| Returns true if all loading has been completed. More... | |
| bool | is_local_cache () const |
| Returns "true" is running in local cache mode. More... | |
| void | set_is_preloading (bool flag) |
| Turn preloading on or off. More... | |
| void | set_is_explicitly_loading (bool flag) |
| Turn on explicit loading. More... | |
| void | set_loading_is_complete () |
| Marks the data_store as fully loaded. More... | |
| void | set_is_local_cache (bool flag=true) |
| turns local cache mode on of off More... | |
| void | check_query_flags () const |
| Check that explicit loading, preloading, and fully loaded flags are consistent. More... | |
| void | exchange_owner_maps () |
| fills in m_owner, which maps index -> owning processor More... | |
| void | build_preloaded_owner_map (const std::vector< int > &per_rank_list_sizes) |
| fills in m_owner, which maps index -> owning processor More... | |
| void | set_preloaded_owner_map (const std::unordered_map< int, int > &owner) |
| fills in m_owner, which maps index -> owning processor More... | |
| void | clear_owner_map () |
| Special hanling for ras_lipid_conduit_data_reader; may go away in the future. More... | |
| void | set_owner_map (const std::unordered_map< int, int > &m) |
| void | add_owner (int data_id, int owner) |
| Special handling for ras_lipid_conduit_data_reader; may go away in the future. More... | |
| void | set_finished_building_map () |
| Special handling for ras_lipid_conduit_data_reader; may go away in the future. More... | |
| void | compact_nodes () |
| int | get_index_owner (int idx) |
| void | preload_local_cache () |
| Read the data set into memory. More... | |
| void | start_exchange_mini_batch_data (size_t current_pos, size_t mb_size) |
| void | finish_exchange_mini_batch_data () |
| void | set_node_sizes_vary () |
| bool | has_conduit_node (int data_id) const |
| int | get_data_size () |
| for use during development and debugging More... | |
| void | copy_members (const data_store_conduit &rhs) |
| made public for debugging during development More... | |
| void | flush_debug_file () |
| Closes then reopens the debug logging file. More... | |
| void | flush_profile_file () const |
| Closes then reopens the profile logging file. More... | |
| void | write_checkpoint (std::string dir_name) |
| Writes object's state to file. More... | |
| void | load_checkpoint (std::string dir_name, generic_data_reader *reader=nullptr) |
| Loads object's state from file. More... | |
| void | set_profile_msg (std::string) |
| Add text to the profiling file, if it's opened. More... | |
| bool | test_local_cache_imagenet (int n) |
| Runs an internal test to ensure the locally cached conduit data is correct. More... | |
| void | test_imagenet_node (int sample_id, bool dereference=true) |
| size_t | get_mem_usage () |
Public Attributes | |
| std::ofstream * | m_debug = nullptr |
| std::ofstream * | m_profile = nullptr |
Private Member Functions | |
| void | start_exchange_data_by_sample (size_t current_pos, size_t mb_size) |
| void | finish_exchange_data_by_sample () |
| void | setup_data_store_buffers () |
| void | build_node_for_sending (const conduit::Node &node_in, conduit::Node &node_out) |
| called by exchange_data More... | |
| void | exchange_sample_sizes () |
| for use when conduit Nodes have non-uniform size, e.g, imagenet More... | |
| int | build_indices_i_will_send (int current_pos, int mb_size) |
| int | build_indices_i_will_recv (int current_pos, int mb_size) |
| void | error_check_compacted_node (const conduit::Node &nd, int data_id) |
| void | exchange_local_caches () |
| All ranks exchange their cached data. More... | |
| void | get_image_sizes (map_is_t &sizes, std::vector< std::vector< int >> &indices) |
| void | allocate_shared_segment (map_is_t &sizes, std::vector< std::vector< int >> &indices) |
| for use in local cache mode More... | |
| void | read_files (std::vector< char > &work, map_is_t &sizes, std::vector< int > &indices) |
| for use in local cache mode More... | |
| void | compute_image_offsets (map_is_t &image_sizes, std::vector< std::vector< int >> &indices) |
| fills in m_image_offsets for use in local cache mode More... | |
| void | exchange_images (std::vector< char > &work, map_is_t &image_sizes, std::vector< std::vector< int >> &indices) |
| for use in local cache mode More... | |
| void | build_conduit_nodes (map_is_t &sizes) |
| void | fillin_shared_images (char *images, size_t size, size_t offset) |
| for use in local cache mode More... | |
| void | test_checkpoint (const std::string &) |
| For testing during development. More... | |
| void | print_variables () |
| Called by test_checkpoint. More... | |
| void | print_partial_owner_map (int n) |
| Called by test_checkpoint. More... | |
| std::string | get_conduit_dir () const |
| std::string | get_cereal_fn () const |
| std::string | get_metadata_fn () const |
| void | make_dir_if_it_doesnt_exist (const std::string &dir) |
| Creates the directory if it does not already exist. More... | |
| void | spill_conduit_node (const conduit::Node &node, int data_id) |
| Writes conduit node to file. More... | |
| void | load_spilled_conduit_nodes () |
| Loads conduit nodes from file into m_data. More... | |
| void | setup_spill (std::string dir) |
| Creates directory structure, opens metadata file for output, etc. More... | |
| void | save_state () |
| Saves this object's state to file. More... | |
| void | open_informational_files () |
| Optionally open debug and profiling files. More... | |
| void | open_next_conduit_spill_directory () |
| Creates a directory for spilling conduit nodes. More... | |
| void | profile_timing () |
| Write timing data for data exchange to the profile file, if it's opened. More... | |
| void | setup_checkpoint_test () |
| std::string | get_lassen_spill_dir () |
| void | verify_sample_size () |
| void | PROFILE () const |
| template<typename T , typename... Types> | |
| void | PROFILE (T var1, Types... var2) const |
| void | DEBUG_DS () |
| template<typename T , typename... Types> | |
| void | DEBUG_DS (T var1, Types... var2) |
Private Attributes | |
| bool | m_bcast_sample_size = true |
| data_store_conduit * | m_other = nullptr |
| bool | m_owner_maps_were_exchanged = false |
| bool | m_run_checkpoint_test = false |
| size_t | m_my_num_indices = 0 |
| The number of samples that this processor owns. More... | |
| bool | m_spill = false |
| if true, then we are spilling (offloading) samples to disk More... | |
| bool | m_is_spilled = false |
| if true, then all samples have been spilled More... | |
| std::ofstream | m_metadata |
| std::string | m_spill_dir_base |
| Base directory for spilling (offloading) conduit nodes. More... | |
| int | m_cur_spill_dir_integer = -1 |
| Used to form the directory path for spilling conduit nodes. More... | |
| std::string | m_cur_spill_dir |
| Current directory for spilling (writing to file) conduit nodes. More... | |
| std::string | m_test_dir |
| The directory to use for testing checkpointing. More... | |
| int | m_num_files_in_cur_spill_dir |
| Contains the number of conduit nodes that have been written to m_cur_dir. More... | |
| map_ii_t | m_spilled_nodes |
| maps data_id to m_m_cur_spill_dir_integer. More... | |
| std::mutex | m_mutex |
| used in set_conduit_node(...) More... | |
| std::mutex | m_mutex_2 |
| char * | m_mem_seg = 0 |
| for use in local cache mode More... | |
| size_t | m_mem_seg_length = 0 |
| std::string | m_seg_name |
| const std::string | m_debug_filename_base = "debug" |
| std::string | m_debug_filename |
| const std::string | m_profile_filename_base = "data_store_profile" |
| std::string | m_profile_filename |
| bool | m_was_loaded_from_file = false |
| const std::string | m_cereal_fn = "data_store_cereal" |
| const int | m_max_files_per_directory = 500 |
| double | m_exchange_sample_sizes_time = 0 |
| double | m_start_snd_rcv_time = 0 |
| double | m_wait_all_time = 0 |
| double | m_rebuild_time = 0 |
| double | m_exchange_time = 0 |
| bool | m_is_setup = false |
| bool | m_loading_is_complete = false |
| set to true if data_store is preloaded More... | |
| bool | m_preloading = false |
| True, if we are in preload mode. More... | |
| bool | m_explicitly_loading = false |
| True, if we are in explicit loading mode. More... | |
| int | m_owner_map_mb_size = 0 |
| int | m_compacted_sample_size = 0 |
| size of a compacted conduit::Node that contains a single sample More... | |
| bool | m_is_local_cache = false |
| bool | m_node_sizes_vary = false |
| bool | m_have_sample_sizes = false |
| used in exchange_data_by_sample, when sample sizes are non-uniform More... | |
| generic_data_reader * | m_reader |
| lbann_comm * | m_comm = nullptr |
| bool | m_world_master |
| convenience handles More... | |
| bool | m_trainer_master |
| int | m_rank_in_trainer |
| int | m_rank_in_world = -1 |
| int | m_partition_in_trainer |
| int | m_offset_in_partition |
| int | m_np_in_trainer |
| number of procs in the trainer; convenience handle More... | |
| int | m_num_partitions_in_trainer |
| bool | m_mini_batch_data_exchange_started = false |
| Flag to indicate if a data exchange has started. More... | |
| map_pssi_t | m_owner |
| Maps an index to the processor that owns the associated data First value of index is the sample ID and second value is the partiton ID. More... | |
| const std::vector< int > * | m_shuffled_indices |
| convenience handle More... | |
| std::unordered_map< int, conduit::Node > | m_data |
| Contains the conduit nodes that are "owned" by this rank. More... | |
| std::unordered_map< int, conduit::Node > | m_data_cache |
| Contains a cache of the conduit nodes that are "owned" by this rank. More... | |
| std::vector< int > | m_recv_data_ids |
| Contains the list of data IDs that will be received. More... | |
| map_ii_t | m_recv_sample_sizes |
| std::unordered_map< int, conduit::Node > | m_minibatch_data |
| std::vector< conduit::Node > | m_send_buffer |
| work space; used in exchange_data More... | |
| std::vector< conduit::Node > | m_send_buffer_2 |
| std::vector< El::mpi::Request< El::byte > > | m_send_requests |
| std::vector< El::mpi::Request< El::byte > > | m_recv_requests |
| std::vector< conduit::Node > | m_recv_buffer |
| std::vector< size_t > | m_outgoing_msg_sizes |
| std::vector< size_t > | m_incoming_msg_sizes |
| map_is_t | m_sample_sizes |
| Maps a data_id to its image size. More... | |
| map_is_t | m_image_offsets |
| Maps a data_id to the image location in a shared memory segment. More... | |
| std::vector< std::unordered_set< int > > | m_indices_to_send |
| std::vector< std::unordered_set< int > > | m_indices_to_recv |
Definition at line 61 of file data_store_conduit.hpp.
| using lbann::data_store_conduit::map_ii_t = std::unordered_map<int, int> |
Definition at line 66 of file data_store_conduit.hpp.
| using lbann::data_store_conduit::map_is_t = std::unordered_map<int, size_t> |
Definition at line 67 of file data_store_conduit.hpp.
| using lbann::data_store_conduit::map_pssi_t = std::unordered_map<std::pair<size_t, size_t>, int, size_t_pair_hash> |
Definition at line 71 of file data_store_conduit.hpp.
| using lbann::data_store_conduit::map_ss_t = std::unordered_map<size_t, size_t> |
Definition at line 74 of file data_store_conduit.hpp.
| lbann::data_store_conduit::data_store_conduit | ( | generic_data_reader * | reader | ) |
ctor
| lbann::data_store_conduit::data_store_conduit | ( | const data_store_conduit & | ) |
copy ctor
| lbann::data_store_conduit::data_store_conduit | ( | const data_store_conduit & | , |
| const std::vector< int > & | |||
| ) |
copy / split ctor
| lbann::data_store_conduit::~data_store_conduit | ( | ) |
dtor
|
inline |
Special handling for ras_lipid_conduit_data_reader; may go away in the future.
Definition at line 226 of file data_store_conduit.hpp.
|
private |
for use in local cache mode
|
private |
|
private |
fills in m_indices_to_recv and returns the number of samples that will be received
|
private |
fills in m_indices_to_send and returns the number of samples that will be sent
|
private |
called by exchange_data
| void lbann::data_store_conduit::build_preloaded_owner_map | ( | const std::vector< int > & | per_rank_list_sizes | ) |
fills in m_owner, which maps index -> owning processor
| void lbann::data_store_conduit::check_mem_capacity | ( | lbann_comm * | comm, |
| const std::string | sample_list_file, | ||
| size_t | stride, | ||
| size_t | offset | ||
| ) |
| void lbann::data_store_conduit::check_query_flags | ( | ) | const |
Check that explicit loading, preloading, and fully loaded flags are consistent.
| void lbann::data_store_conduit::clear_owner_map | ( | ) |
Special hanling for ras_lipid_conduit_data_reader; may go away in the future.
| void lbann::data_store_conduit::compact_nodes | ( | ) |
Recompact the nodes because they are not copied properly when instantiating using the copy constructor
|
private |
fills in m_image_offsets for use in local cache mode
|
inline |
Definition at line 88 of file data_store_conduit.hpp.
| void lbann::data_store_conduit::copy_members | ( | const data_store_conduit & | rhs | ) |
made public for debugging during development
|
inlineprivate |
Definition at line 688 of file data_store_conduit.hpp.
|
inlineprivate |
Definition at line 698 of file data_store_conduit.hpp.
|
private |
|
private |
for use in local cache mode
|
private |
All ranks exchange their cached data.
| void lbann::data_store_conduit::exchange_owner_maps | ( | ) |
fills in m_owner, which maps index -> owning processor
|
private |
for use when conduit Nodes have non-uniform size, e.g, imagenet
|
private |
for use in local cache mode
|
private |
| void lbann::data_store_conduit::finish_exchange_mini_batch_data | ( | ) |
| void lbann::data_store_conduit::flush_debug_file | ( | ) |
Closes then reopens the debug logging file.
Debug logging is enabled on all ranks via the cmd line flag: –data_store_debug
| void lbann::data_store_conduit::flush_profile_file | ( | ) | const |
Closes then reopens the profile logging file.
Profile logging is enabled on P_0 via the cmd line flag: –data_store_profile
|
private |
|
private |
| const conduit::Node& lbann::data_store_conduit::get_conduit_node | ( | int | data_id | ) | const |
Returns the conduit Node associated with the data_id.
|
inline |
for use during development and debugging
Definition at line 264 of file data_store_conduit.hpp.
| conduit::Node& lbann::data_store_conduit::get_empty_node | ( | int | data_id | ) |
returns an empty node
|
private |
Currently only used for imagenet. On return, 'sizes' maps a sample_id to image size, and indices[p] contains the sample_ids that P_p owns for use in local cache mode
| int lbann::data_store_conduit::get_index_owner | ( | int | idx | ) |
returns the processor that owns the data associated with the index
|
private |
| size_t lbann::data_store_conduit::get_mem_usage | ( | ) |
|
private |
| size_t lbann::data_store_conduit::get_num_global_indices | ( | ) | const |
Returns the number of samples summed over all ranks.
| const conduit::Node& lbann::data_store_conduit::get_random_node | ( | ) | const |
| const conduit::Node& lbann::data_store_conduit::get_random_node | ( | const std::string & | field | ) | const |
| bool lbann::data_store_conduit::has_conduit_node | ( | int | data_id | ) | const |
|
inline |
Returns true if explicitly loading is turned on.
'explicitly loading' means that the data that will be owned by each rank is passed into the data store during the first epoch. This is in contrast to preloading, in which the data is passed into the data store prior to the first epoch. Explicit and preloading are exclusive: at most only one may be true, however, both will be set to false when all loading is complete.
Definition at line 150 of file data_store_conduit.hpp.
| bool lbann::data_store_conduit::is_fully_loaded | ( | ) | const |
Returns true if all loading has been completed.
See notes in: set_loading_is_complete()
|
inline |
Returns "true" is running in local cache mode.
In local cache mode, each node contains a complete copy of the data set. This is stored in a shared memory segment, but part of the set may be spilled to disk if memory is insufficient. Local cache mode is activated via the cmd line flag: –data_store_cache
Definition at line 166 of file data_store_conduit.hpp.
|
inline |
Returns true if preloading is turned on.
See notes in: is_explicitly_loading()
Definition at line 139 of file data_store_conduit.hpp.
| void lbann::data_store_conduit::load_checkpoint | ( | std::string | dir_name, |
| generic_data_reader * | reader = nullptr |
||
| ) |
Loads object's state from file.
|
private |
Loads conduit nodes from file into m_data.
|
private |
Creates the directory if it does not already exist.
|
private |
Optionally open debug and profiling files.
A debug file is opened for every <rank, data reader role> pair; files are opened if the cmd flag –data_store_debug is passed. A profiling file is opened only be <world_master, data reader role> pairs; files are opened if the cmd flag –data_store_profile is passed.
|
private |
Creates a directory for spilling conduit nodes.
| data_store_conduit& lbann::data_store_conduit::operator= | ( | const data_store_conduit & | ) |
operator=
| void lbann::data_store_conduit::preload_local_cache | ( | ) |
Read the data set into memory.
Each rank reads a portion of the data set, then bcasts to all other ranks.
|
private |
Called by test_checkpoint.
For testing and development. Prints the first 'n' entries from the owner map * (which maps sample_id -> owning rank) to std::cout
|
private |
Called by test_checkpoint.
|
inlineprivate |
Definition at line 665 of file data_store_conduit.hpp.
|
inlineprivate |
Definition at line 675 of file data_store_conduit.hpp.
|
private |
Write timing data for data exchange to the profile file, if it's opened.
|
private |
for use in local cache mode
|
private |
Saves this object's state to file.
Here, "state" is all data, except for conduit nodes, that is needed to reload from checkpoint
| void lbann::data_store_conduit::set_conduit_node | ( | int | data_id, |
| const conduit::Node & | node, | ||
| bool | already_have = false |
||
| ) |
Set a conduit node in the data store.
if 'already_have = true' then the passed 'node' was obtained by a call to get_empty_node(); note, we do this to prevent copying the node
| void lbann::data_store_conduit::set_data_reader_ptr | ( | generic_data_reader * | reader | ) |
|
inline |
Special handling for ras_lipid_conduit_data_reader; may go away in the future.
Definition at line 233 of file data_store_conduit.hpp.
| void lbann::data_store_conduit::set_is_explicitly_loading | ( | bool | flag | ) |
Turn on explicit loading.
|
inline |
turns local cache mode on of off
Definition at line 188 of file data_store_conduit.hpp.
| void lbann::data_store_conduit::set_is_preloading | ( | bool | flag | ) |
Turn preloading on or off.
| void lbann::data_store_conduit::set_loading_is_complete | ( | ) |
Marks the data_store as fully loaded.
Fully loaded means that each rank has all the data that it is intended to own. When not running in local cache mode, this occurs (1) at the conclusion of preloading, prior to the beginning of the first epoch, or (2) at the conclusion of the first epoch, if explicitly loading. When running in local cache mode, this occurs (1) at the conclusion of preload_local_cache(), which is called prior to the first epoch, or (2) at the conclusion of exchange_local_caches(), at th conclusion of the first epoch, if explicitly loading.
|
inline |
Definition at line 253 of file data_store_conduit.hpp.
|
inline |
Definition at line 217 of file data_store_conduit.hpp.
| void lbann::data_store_conduit::set_preloaded_conduit_node | ( | int | data_id, |
| const conduit::Node & | node | ||
| ) |
|
inline |
fills in m_owner, which maps index -> owning processor
Definition at line 206 of file data_store_conduit.hpp.
| void lbann::data_store_conduit::set_profile_msg | ( | std::string | ) |
Add text to the profiling file, if it's opened.
| void lbann::data_store_conduit::set_shuffled_indices | ( | const std::vector< int > * | indices | ) |
convenience handle
| void lbann::data_store_conduit::setup | ( | int | mini_batch_size | ) |
|
private |
|
private |
|
private |
Creates directory structure, opens metadata file for output, etc.
This method is called for both –data_store_spill and –data_store_test_checkpoint
|
private |
Writes conduit node to file.
| void lbann::data_store_conduit::spill_preloaded_conduit_node | ( | int | data_id, |
| const conduit::Node & | node | ||
| ) |
|
private |
| void lbann::data_store_conduit::start_exchange_mini_batch_data | ( | size_t | current_pos, |
| size_t | mb_size | ||
| ) |
|
private |
For testing during development.
At the beginning of the 2nd epoch, calls write_checkpoint(), clears some variables, calls load_checkpoint then continues. To activate this test use cmd flag: –data_store_test_checkpoint=
| void lbann::data_store_conduit::test_imagenet_node | ( | int | sample_id, |
| bool | dereference = true |
||
| ) |
| bool lbann::data_store_conduit::test_local_cache_imagenet | ( | int | n | ) |
Runs an internal test to ensure the locally cached conduit data is correct.
For use during development and testing. This test is activated via the cmd line flag: –data_store_test_cache. Output may be written to cout, and the profile and debug files (if they are opened)
| n | is the maximum number of samples to test; set to -1 to test all |
|
private |
| void lbann::data_store_conduit::write_checkpoint | ( | std::string | dir_name | ) |
Writes object's state to file.
|
private |
Definition at line 314 of file data_store_conduit.hpp.
|
private |
Definition at line 386 of file data_store_conduit.hpp.
|
private |
Definition at line 456 of file data_store_conduit.hpp.
|
private |
size of a compacted conduit::Node that contains a single sample
Definition at line 445 of file data_store_conduit.hpp.
|
private |
Current directory for spilling (writing to file) conduit nodes.
m_cur_spill_dir = m_spill_dir_base/m_cur_spill_dir_integer
Definition at line 348 of file data_store_conduit.hpp.
|
private |
Used to form the directory path for spilling conduit nodes.
Definition at line 341 of file data_store_conduit.hpp.
|
mutableprivate |
Contains the conduit nodes that are "owned" by this rank.
Map data_id -> conduit::Node. Must be mutable since rhs.m_owner may be modified in copy_members, in which rhs is const.
Definition at line 490 of file data_store_conduit.hpp.
|
private |
Contains a cache of the conduit nodes that are "owned" by this rank.
This differs from m_data in that this holds temporarily, during the first epoch, if we're running in local cache mode and explicitly loading
Definition at line 499 of file data_store_conduit.hpp.
| std::ofstream* lbann::data_store_conduit::m_debug = nullptr |
only used for debugging; pass –debug on cmd line to get each data store to print to a different file. This is made public so data readers can also print to the file
Definition at line 260 of file data_store_conduit.hpp.
|
private |
Definition at line 380 of file data_store_conduit.hpp.
|
private |
Definition at line 379 of file data_store_conduit.hpp.
|
private |
Definition at line 398 of file data_store_conduit.hpp.
|
private |
Definition at line 410 of file data_store_conduit.hpp.
|
private |
True, if we are in explicit loading mode.
There is some redundancy here: m_preloading and m_explicitly_loading can not both be true, but both may be false. When m_loading_is_complete is true, both m_preloading and m_preloading should be false.
Definition at line 436 of file data_store_conduit.hpp.
|
private |
used in exchange_data_by_sample, when sample sizes are non-uniform
Definition at line 452 of file data_store_conduit.hpp.
|
private |
Maps a data_id to the image location in a shared memory segment.
Definition at line 526 of file data_store_conduit.hpp.
|
private |
Definition at line 516 of file data_store_conduit.hpp.
|
private |
maps processor id -> set of indices (whose associated samples) this proc needs to recv from others. (formerly called "needed")
Definition at line 535 of file data_store_conduit.hpp.
|
private |
maps processor id -> set of indices (whose associated samples) this proc needs to send. (formerly called "proc_to_indices); this is filled in by build_indices_i_will_send()
Definition at line 531 of file data_store_conduit.hpp.
|
private |
Definition at line 447 of file data_store_conduit.hpp.
|
private |
Definition at line 422 of file data_store_conduit.hpp.
|
private |
if true, then all samples have been spilled
Definition at line 332 of file data_store_conduit.hpp.
|
private |
set to true if data_store is preloaded
Definition at line 425 of file data_store_conduit.hpp.
|
private |
used in spill_to_file (actually, conduit::Node.save() writes both a json file and a binary file, so double this number
Definition at line 391 of file data_store_conduit.hpp.
|
private |
for use in local cache mode
Definition at line 375 of file data_store_conduit.hpp.
|
private |
Definition at line 376 of file data_store_conduit.hpp.
|
private |
During spilling, the conduit file pathnames are written to this file
Definition at line 335 of file data_store_conduit.hpp.
|
private |
Flag to indicate if a data exchange has started.
Definition at line 471 of file data_store_conduit.hpp.
|
private |
This vector contains Nodes that this processor needs for the current minibatch; this is filled in by exchange_data()
Definition at line 507 of file data_store_conduit.hpp.
|
mutableprivate |
used in set_conduit_node(...)
Definition at line 370 of file data_store_conduit.hpp.
|
private |
Definition at line 372 of file data_store_conduit.hpp.
|
private |
The number of samples that this processor owns.
Definition at line 326 of file data_store_conduit.hpp.
|
private |
Definition at line 449 of file data_store_conduit.hpp.
|
private |
number of procs in the trainer; convenience handle
Definition at line 467 of file data_store_conduit.hpp.
|
private |
Contains the number of conduit nodes that have been written to m_cur_dir.
When m_num_files_in_cur_spill_dir == m_max_files_per_directory, m_cur_spill_dir_integer is incremented and a new m_cur_dir is created
Definition at line 363 of file data_store_conduit.hpp.
|
private |
Definition at line 468 of file data_store_conduit.hpp.
|
private |
Definition at line 464 of file data_store_conduit.hpp.
|
private |
Definition at line 319 of file data_store_conduit.hpp.
|
private |
Definition at line 515 of file data_store_conduit.hpp.
|
mutableprivate |
Maps an index to the processor that owns the associated data First value of index is the sample ID and second value is the partiton ID.
Must be mutable since rhs.m_owner may be modified in copy_members, in which rhs is const.
Definition at line 479 of file data_store_conduit.hpp.
|
private |
The size of the mini-batch that was used to calculate ownership of samples when building the owner map. This size has to be used consistently when computing the indices that will be sent and received.
Definition at line 442 of file data_store_conduit.hpp.
|
private |
Definition at line 321 of file data_store_conduit.hpp.
|
private |
Definition at line 463 of file data_store_conduit.hpp.
|
private |
True, if we are in preload mode.
Definition at line 428 of file data_store_conduit.hpp.
| std::ofstream* lbann::data_store_conduit::m_profile = nullptr |
Definition at line 261 of file data_store_conduit.hpp.
|
private |
Definition at line 383 of file data_store_conduit.hpp.
|
private |
Definition at line 382 of file data_store_conduit.hpp.
|
private |
Definition at line 461 of file data_store_conduit.hpp.
|
private |
Definition at line 462 of file data_store_conduit.hpp.
|
private |
Definition at line 454 of file data_store_conduit.hpp.
|
private |
Definition at line 407 of file data_store_conduit.hpp.
|
private |
Definition at line 514 of file data_store_conduit.hpp.
|
private |
Contains the list of data IDs that will be received.
Definition at line 502 of file data_store_conduit.hpp.
|
private |
Definition at line 513 of file data_store_conduit.hpp.
|
private |
Definition at line 503 of file data_store_conduit.hpp.
|
private |
Definition at line 323 of file data_store_conduit.hpp.
|
private |
Maps a data_id to its image size.
Used when conduit Nodes have non-uniform size, e.g, imagenet; see: set_node_sizes_vary()
Definition at line 523 of file data_store_conduit.hpp.
|
private |
Definition at line 377 of file data_store_conduit.hpp.
|
private |
work space; used in exchange_data
Definition at line 510 of file data_store_conduit.hpp.
|
private |
Definition at line 511 of file data_store_conduit.hpp.
|
private |
Definition at line 512 of file data_store_conduit.hpp.
|
private |
convenience handle
Definition at line 482 of file data_store_conduit.hpp.
|
private |
if true, then we are spilling (offloading) samples to disk
Definition at line 329 of file data_store_conduit.hpp.
|
private |
Base directory for spilling (offloading) conduit nodes.
Definition at line 338 of file data_store_conduit.hpp.
|
private |
maps data_id to m_m_cur_spill_dir_integer.
Definition at line 366 of file data_store_conduit.hpp.
|
private |
Definition at line 401 of file data_store_conduit.hpp.
|
private |
The directory to use for testing checkpointing.
Testing is activated by passing the cmd flag: –data_store_test_checkpoint=<dir>
Definition at line 355 of file data_store_conduit.hpp.
|
private |
Definition at line 460 of file data_store_conduit.hpp.
|
private |
Definition at line 404 of file data_store_conduit.hpp.
|
private |
Definition at line 385 of file data_store_conduit.hpp.
|
private |
convenience handles
Definition at line 459 of file data_store_conduit.hpp.