|
LBANN
0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
|
#include <compound_data_reader.hpp>
Public Member Functions | |
| generic_compound_data_reader (std::vector< generic_data_reader *> data_readers, bool shuffle=true) | |
| generic_compound_data_reader (const generic_compound_data_reader &other) | |
| generic_compound_data_reader & | operator= (const generic_compound_data_reader &other) |
| ~generic_compound_data_reader () override | |
| generic_compound_data_reader * | copy () const override=0 |
| void | set_execution_mode_split_fraction (execution_mode m, double s) override |
| Apply operations to subsidiary data readers. More... | |
| void | set_role (std::string role) override |
| std::vector< generic_data_reader * > & | get_data_readers () |
| needed to support data_store_merge_samples More... | |
| bool | has_labels () const override |
| bool | has_responses () const override |
| void | set_has_labels (const bool b) override |
| Whether or not a data reader has labels. More... | |
| void | set_has_responses (const bool b) override |
| Whether or not a data reader has a response field. More... | |
Public Member Functions inherited from lbann::generic_data_reader | |
| generic_data_reader (bool shuffle=true) | |
| generic_data_reader (const generic_data_reader &)=default | |
| generic_data_reader & | operator= (const generic_data_reader &)=default |
| virtual | ~generic_data_reader () |
| template<class Archive > | |
| void | serialize (Archive &ar) |
| void | set_comm (lbann_comm *comm) |
| set the comm object More... | |
| lbann_comm * | get_comm () const |
| returns a (possibly nullptr) to comm More... | |
| virtual bool | has_conduit_output () |
| void | set_file_dir (std::string s) |
| void | set_local_file_dir (std::string s) |
| void | set_max_files_to_load (size_t n) |
| std::string | get_file_dir () const |
| std::string | get_local_file_dir () const |
| void | set_data_sample_list (std::string s) |
| std::string | get_data_sample_list () const |
| void | keep_sample_order (bool same_order=false) |
| void | set_data_filename (std::string s) |
| std::string | get_data_filename () const |
| void | set_label_filename (std::string s) |
| std::string | get_label_filename () const |
| void | set_shuffle (bool b) |
| bool | is_shuffled () const |
| void | set_shuffled_indices (const std::vector< int > &indices) |
| const std::vector< int > & | get_shuffled_indices () const |
| void | set_first_n (int n) |
| void | set_absolute_sample_count (size_t s) |
| void | set_use_fraction (double s) |
| std::string | get_role () const |
| virtual void | load ()=0 |
| virtual void | setup (int num_io_threads, observer_ptr< thread_pool > io_thread_pool) |
| virtual std::string | get_type () const =0 |
| int | fetch (std::map< data_field_type, CPUMat *> &input_buffers, El::Matrix< El::Int > &indices_fetched, size_t mb_size) |
| Fetch a mini-batch worth of data, including samples, labels, responses (as appropriate) More... | |
| int | fetch (std::vector< conduit::Node > &samples, El::Matrix< El::Int > &indices_fetched, size_t mb_size) |
| virtual bool | has_data_field (data_field_type data_field) const |
| Check to see if the data reader supports this specific data field. More... | |
| void | set_has_data_field (data_field_type const data_field, const bool b) |
| Whether or not a data reader has a data field. More... | |
| void | start_data_store_mini_batch_exchange () |
| void | finish_data_store_mini_batch_exchange () |
| virtual bool | update (bool is_active_reader) |
| virtual int | get_num_labels () const |
| Return the number of labels (classes) in this dataset. More... | |
| virtual int | get_num_responses () const |
| Return the number of responses in this dataset. More... | |
| virtual int | get_linearized_data_size () const |
| Get the linearized size (i.e. number of elements) in a sample. More... | |
| virtual int | get_linearized_label_size () const |
| Get the linearized size (i.e. number of elements) in a label. More... | |
| virtual int | get_linearized_response_size () const |
| Get the linearized size (i.e. number of elements) in a response. More... | |
| virtual int | get_linearized_size (data_field_type const &data_field) const |
| get the linearized size of what is identified by desc. More... | |
| virtual const std::vector< El::Int > | get_data_dims () const |
| Get the dimensions of the data. More... | |
| virtual std::vector< El::Int > | get_slice_points (const slice_points_mode var_category, bool &is_supported) |
| virtual bool | position_valid () const |
| True if the data reader's current position is valid. More... | |
| virtual bool | position_is_overrun () const |
| bool | at_new_epoch () const |
| True if the data reader is at the start of an epoch. More... | |
| void | set_mini_batch_size (const int s) |
| Set the mini batch size. More... | |
| int | get_mini_batch_size () const |
| Get the mini batch size. More... | |
| int | get_loaded_mini_batch_size () const |
| Get the loaded mini-batch size. More... | |
| int | get_current_mini_batch_size () const |
| Get the current mini-batch size. More... | |
| int | get_mini_batch_max () const |
| Return the full mini_batch_size. More... | |
| void | set_stride_to_next_mini_batch (const int s) |
| Set the mini batch stride. More... | |
| int | get_stride_to_next_mini_batch () const |
| Return the mini batch stride. More... | |
| void | set_sample_stride (const int s) |
| Set the sample stride. More... | |
| int | get_sample_stride () const |
| Return the sample stride. More... | |
| void | set_iteration_stride (const int s) |
| Set the iteration stride. More... | |
| int | get_iteration_stride () const |
| Return the iteration stride. More... | |
| virtual void | set_base_offset (const int s) |
| Return the base offset. More... | |
| int | get_base_offset () const |
| Return the base offset. More... | |
| void | set_last_mini_batch_size (const int s) |
| Set the last mini batch size. More... | |
| int | get_last_mini_batch_size () const |
| Return the last mini batch size. More... | |
| void | set_stride_to_last_mini_batch (const int s) |
| Set the last mini batch stride. More... | |
| int | get_stride_to_last_mini_batch () const |
| Return the last mini batch stride. More... | |
| void | set_num_parallel_readers (const int s) |
| Set the number of parallel readers per model. More... | |
| int | get_num_parallel_readers () const |
| Return the number of parallel readers per model. More... | |
| virtual void | set_reset_mini_batch_index (const int s) |
| Set the starting mini-batch index for the epoch. More... | |
| int | get_reset_mini_batch_index () const |
| Return the starting mini-batch index for the epoch. More... | |
| int | get_loaded_mini_batch_index () const |
| Return the current mini-batch index for the epoch. More... | |
| int | get_current_mini_batch_index () const |
| Return the current mini-batch index for the epoch. More... | |
| void | set_initial_position () |
| Set the current position based on the base and model offsets. More... | |
| int | get_position () const |
| Get the current position in the data reader. More... | |
| int | get_next_position () const |
| Get the next position in the data reader. More... | |
| int * | get_indices () |
| Get a pointer to the start of the shuffled indices. More... | |
| virtual int | get_num_data () const |
| Get the number of samples in this dataset. More... | |
| int | get_num_unused_data (execution_mode m) const |
| Get the number of unused samples in this dataset. More... | |
| int * | get_unused_data (execution_mode m) |
| Get a pointer to the start of the unused sample indices. More... | |
| const std::vector< int > & | get_unused_indices (execution_mode m) |
| void | set_num_iterations_per_epoch (int num_iterations_per_epoch) |
| Set the number of iterations in each epoch. More... | |
| int | get_num_iterations_per_epoch () const |
| Get the number of iterations in each epoch. More... | |
| int | get_current_step_in_epoch () const |
| void | resize_shuffled_indices () |
| void | select_subset_of_data () |
| virtual void | use_unused_index_set (execution_mode m) |
| virtual bool | has_list_per_model () const |
| Does the data reader have a unique sample list per model. More... | |
| virtual bool | has_list_per_trainer () const |
| Does the data reader have a unique sample list per trainer. More... | |
| bool | save_to_checkpoint_shared (persist &p, execution_mode mode) |
| Given directory to store checkpoint files, write state to file and add to number of bytes written. More... | |
| bool | load_from_checkpoint_shared (persist &p, execution_mode mode) |
| Given directory to store checkpoint files, read state from file and add to number of bytes read. More... | |
| bool | save_to_checkpoint_distributed (persist &p, execution_mode mode) |
| bool | load_from_checkpoint_distributed (persist &p, execution_mode mode) |
| Given directory to store checkpoint files, read state from file and add to number of bytes read. More... | |
| const data_store_conduit & | get_data_store () const |
| returns a const ref to the data store More... | |
| data_store_conduit & | get_data_store () |
| returns a non-const ref to the data store More... | |
| data_store_conduit * | get_data_store_ptr () const |
| void | setup_data_store (int mini_batch_size) |
| void | instantiate_data_store () |
| virtual void | preload_data_store () |
| void | set_gan_labelling (bool has_gan_labelling) |
| void | set_gan_label_value (int gan_label_value) |
| void | set_data_store (data_store_conduit *g) |
| support of data store functionality More... | |
| virtual bool | data_store_active () const |
| virtual bool | priming_data_store () const |
| virtual void | post_update () |
| void | set_transform_pipeline (transform::transform_pipeline &&tp) |
| void | print_get_methods (const std::string filename) |
| Print the return values from various get_X methods to file. More... | |
| size_t | get_num_indices_to_use () const |
| void | set_use_data_store (bool s) |
Protected Attributes | |
| std::vector< generic_data_reader * > | m_data_readers |
| List of readers providing data. More... | |
Protected Attributes inherited from lbann::generic_data_reader | |
| bool | m_verbose = false |
| std::unordered_set< int > | m_using_random_node |
| data_store_conduit * | m_data_store |
| lbann_comm * | m_comm |
| bool | m_use_data_store = false |
| std::map< data_field_type, bool > | m_supported_input_types |
| Holds a true value for each input data type that is supported. Use an ordered map so that checkpoints are stable. More... | |
| bool | m_gan_labelling |
| int | m_gan_label_value |
| observer_ptr< thread_pool > | m_io_thread_pool |
| bool | m_keep_sample_order |
| transform::transform_pipeline | m_transform_pipeline |
| bool | m_issue_warning |
Additional Inherited Members | |
Public Types inherited from lbann::generic_data_reader | |
| using | unused_index_map_t = std::map< execution_mode, std::vector< int > > |
Public Attributes inherited from lbann::generic_data_reader | |
| int | m_mini_batch_size |
| int | m_current_pos |
| int | m_stride_to_next_mini_batch |
| int | m_base_offset |
| int | m_sample_stride |
| int | m_iteration_stride |
| Stride used by parallel data readers within the model. More... | |
| std::vector< int > | m_shuffled_indices |
| unused_index_map_t | m_unused_indices |
| Record of the indicies that are not being used for training. More... | |
| int | m_last_mini_batch_size |
| int | m_stride_to_last_mini_batch |
| int | m_reset_mini_batch_index |
| The index at which this data reader starts its epoch. More... | |
| int | m_loaded_mini_batch_idx |
| The index of the current mini-batch that has been loaded. More... | |
| int | m_current_mini_batch_idx |
| int | m_num_iterations_per_epoch |
| int | m_num_parallel_readers |
| How many iterations all readers will execute. More... | |
| size_t | m_max_files_to_load |
| How many parallel readers are being used. More... | |
| std::string | m_file_dir |
| std::string | m_local_file_dir |
| std::string | m_data_sample_list |
| std::string | m_data_fn |
| std::string | m_label_fn |
| bool | m_shuffle |
| size_t | m_absolute_sample_count |
| std::map< execution_mode, double > | m_execution_mode_split_fraction |
| double | m_use_fraction |
| int | m_first_n |
| std::string | m_role |
Protected Member Functions inherited from lbann::generic_data_reader | |
| size_t | get_absolute_sample_count () const |
| double | get_use_fraction () const |
| double | get_execution_mode_split_fraction (execution_mode m) const |
| virtual bool | fetch_data_block (std::map< data_field_type, CPUMat *> &input_buffers, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched) |
| bool | fetch_data_block_conduit (std::vector< conduit::Node > &samples, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched) |
| virtual bool | fetch_data_field (data_field_type data_field, CPUMat &Y, int data_id, int mb_idx) |
| Called by fetch_data, fetch_label, fetch_response. More... | |
| virtual bool | fetch_conduit_node (conduit::Node &sample, int data_id) |
| virtual bool | fetch_datum (CPUMat &X, int data_id, int mb_idx) |
| virtual bool | fetch_label (CPUMat &Y, int data_id, int mb_idx) |
| virtual bool | fetch_response (CPUMat &Y, int data_id, int mb_idx) |
| CPUMat | create_datum_view (CPUMat &X, const int mb_idx) |
| virtual void | preprocess_data_source (int tid) |
| virtual void | postprocess_data_source (int tid) |
| virtual void | shuffle_indices () |
| Shuffle indices (uses the data_seq_generator) More... | |
| virtual void | shuffle_indices (rng_gen &gen) |
| Shuffle indices and profide a random number generator. More... | |
| void | error_check_counts () const |
Data reader for merging the samples from multiple data readers into a single dataset.
Definition at line 40 of file compound_data_reader.hpp.
|
inline |
Definition at line 43 of file compound_data_reader.hpp.
|
inline |
Definition at line 53 of file compound_data_reader.hpp.
|
inlineoverride |
|
overridepure virtual |
Implements lbann::generic_data_reader.
Implemented in lbann::data_reader_merge_features, and lbann::data_reader_merge_samples.
|
inline |
needed to support data_store_merge_samples
Definition at line 103 of file compound_data_reader.hpp.
|
inlineoverridevirtual |
Reimplemented from lbann::generic_data_reader.
Definition at line 108 of file compound_data_reader.hpp.
|
inlineoverridevirtual |
Reimplemented from lbann::generic_data_reader.
Definition at line 116 of file compound_data_reader.hpp.
|
inline |
|
inlineoverridevirtual |
Apply operations to subsidiary data readers.
Don't propagate the validation fraction to subsidiary readers The fraction is applied at the top level
Reimplemented from lbann::generic_data_reader.
Definition at line 84 of file compound_data_reader.hpp.
|
inlineoverridevirtual |
Whether or not a data reader has labels.
Reimplemented from lbann::generic_data_reader.
Definition at line 124 of file compound_data_reader.hpp.
|
inlineoverridevirtual |
Whether or not a data reader has a response field.
Reimplemented from lbann::generic_data_reader.
Definition at line 131 of file compound_data_reader.hpp.
|
inlineoverridevirtual |
Set an idenifier for the dataset. The role should be one of "train", "test", or "validate".
Reimplemented from lbann::generic_data_reader.
Definition at line 94 of file compound_data_reader.hpp.
|
protected |
List of readers providing data.
Definition at line 142 of file compound_data_reader.hpp.