LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
lbann::generic_compound_data_reader Class Referenceabstract

#include <compound_data_reader.hpp>

Inheritance diagram for lbann::generic_compound_data_reader:
[legend]
Collaboration diagram for lbann::generic_compound_data_reader:
[legend]

Public Member Functions

 generic_compound_data_reader (std::vector< generic_data_reader *> data_readers, bool shuffle=true)
 
 generic_compound_data_reader (const generic_compound_data_reader &other)
 
generic_compound_data_readeroperator= (const generic_compound_data_reader &other)
 
 ~generic_compound_data_reader () override
 
generic_compound_data_readercopy () const override=0
 
void set_execution_mode_split_fraction (execution_mode m, double s) override
 Apply operations to subsidiary data readers. More...
 
void set_role (std::string role) override
 
std::vector< generic_data_reader * > & get_data_readers ()
 needed to support data_store_merge_samples More...
 
bool has_labels () const override
 
bool has_responses () const override
 
void set_has_labels (const bool b) override
 Whether or not a data reader has labels. More...
 
void set_has_responses (const bool b) override
 Whether or not a data reader has a response field. More...
 
- Public Member Functions inherited from lbann::generic_data_reader
 generic_data_reader (bool shuffle=true)
 
 generic_data_reader (const generic_data_reader &)=default
 
generic_data_readeroperator= (const generic_data_reader &)=default
 
virtual ~generic_data_reader ()
 
template<class Archive >
void serialize (Archive &ar)
 
void set_comm (lbann_comm *comm)
 set the comm object More...
 
lbann_commget_comm () const
 returns a (possibly nullptr) to comm More...
 
virtual bool has_conduit_output ()
 
void set_file_dir (std::string s)
 
void set_local_file_dir (std::string s)
 
void set_max_files_to_load (size_t n)
 
std::string get_file_dir () const
 
std::string get_local_file_dir () const
 
void set_data_sample_list (std::string s)
 
std::string get_data_sample_list () const
 
void keep_sample_order (bool same_order=false)
 
void set_data_filename (std::string s)
 
std::string get_data_filename () const
 
void set_label_filename (std::string s)
 
std::string get_label_filename () const
 
void set_shuffle (bool b)
 
bool is_shuffled () const
 
void set_shuffled_indices (const std::vector< int > &indices)
 
const std::vector< int > & get_shuffled_indices () const
 
void set_first_n (int n)
 
void set_absolute_sample_count (size_t s)
 
void set_use_fraction (double s)
 
std::string get_role () const
 
virtual void load ()=0
 
virtual void setup (int num_io_threads, observer_ptr< thread_pool > io_thread_pool)
 
virtual std::string get_type () const =0
 
int fetch (std::map< data_field_type, CPUMat *> &input_buffers, El::Matrix< El::Int > &indices_fetched, size_t mb_size)
 Fetch a mini-batch worth of data, including samples, labels, responses (as appropriate) More...
 
int fetch (std::vector< conduit::Node > &samples, El::Matrix< El::Int > &indices_fetched, size_t mb_size)
 
virtual bool has_data_field (data_field_type data_field) const
 Check to see if the data reader supports this specific data field. More...
 
void set_has_data_field (data_field_type const data_field, const bool b)
 Whether or not a data reader has a data field. More...
 
void start_data_store_mini_batch_exchange ()
 
void finish_data_store_mini_batch_exchange ()
 
virtual bool update (bool is_active_reader)
 
virtual int get_num_labels () const
 Return the number of labels (classes) in this dataset. More...
 
virtual int get_num_responses () const
 Return the number of responses in this dataset. More...
 
virtual int get_linearized_data_size () const
 Get the linearized size (i.e. number of elements) in a sample. More...
 
virtual int get_linearized_label_size () const
 Get the linearized size (i.e. number of elements) in a label. More...
 
virtual int get_linearized_response_size () const
 Get the linearized size (i.e. number of elements) in a response. More...
 
virtual int get_linearized_size (data_field_type const &data_field) const
 get the linearized size of what is identified by desc. More...
 
virtual const std::vector< El::Int > get_data_dims () const
 Get the dimensions of the data. More...
 
virtual std::vector< El::Int > get_slice_points (const slice_points_mode var_category, bool &is_supported)
 
virtual bool position_valid () const
 True if the data reader's current position is valid. More...
 
virtual bool position_is_overrun () const
 
bool at_new_epoch () const
 True if the data reader is at the start of an epoch. More...
 
void set_mini_batch_size (const int s)
 Set the mini batch size. More...
 
int get_mini_batch_size () const
 Get the mini batch size. More...
 
int get_loaded_mini_batch_size () const
 Get the loaded mini-batch size. More...
 
int get_current_mini_batch_size () const
 Get the current mini-batch size. More...
 
int get_mini_batch_max () const
 Return the full mini_batch_size. More...
 
void set_stride_to_next_mini_batch (const int s)
 Set the mini batch stride. More...
 
int get_stride_to_next_mini_batch () const
 Return the mini batch stride. More...
 
void set_sample_stride (const int s)
 Set the sample stride. More...
 
int get_sample_stride () const
 Return the sample stride. More...
 
void set_iteration_stride (const int s)
 Set the iteration stride. More...
 
int get_iteration_stride () const
 Return the iteration stride. More...
 
virtual void set_base_offset (const int s)
 Return the base offset. More...
 
int get_base_offset () const
 Return the base offset. More...
 
void set_last_mini_batch_size (const int s)
 Set the last mini batch size. More...
 
int get_last_mini_batch_size () const
 Return the last mini batch size. More...
 
void set_stride_to_last_mini_batch (const int s)
 Set the last mini batch stride. More...
 
int get_stride_to_last_mini_batch () const
 Return the last mini batch stride. More...
 
void set_num_parallel_readers (const int s)
 Set the number of parallel readers per model. More...
 
int get_num_parallel_readers () const
 Return the number of parallel readers per model. More...
 
virtual void set_reset_mini_batch_index (const int s)
 Set the starting mini-batch index for the epoch. More...
 
int get_reset_mini_batch_index () const
 Return the starting mini-batch index for the epoch. More...
 
int get_loaded_mini_batch_index () const
 Return the current mini-batch index for the epoch. More...
 
int get_current_mini_batch_index () const
 Return the current mini-batch index for the epoch. More...
 
void set_initial_position ()
 Set the current position based on the base and model offsets. More...
 
int get_position () const
 Get the current position in the data reader. More...
 
int get_next_position () const
 Get the next position in the data reader. More...
 
int * get_indices ()
 Get a pointer to the start of the shuffled indices. More...
 
virtual int get_num_data () const
 Get the number of samples in this dataset. More...
 
int get_num_unused_data (execution_mode m) const
 Get the number of unused samples in this dataset. More...
 
int * get_unused_data (execution_mode m)
 Get a pointer to the start of the unused sample indices. More...
 
const std::vector< int > & get_unused_indices (execution_mode m)
 
void set_num_iterations_per_epoch (int num_iterations_per_epoch)
 Set the number of iterations in each epoch. More...
 
int get_num_iterations_per_epoch () const
 Get the number of iterations in each epoch. More...
 
int get_current_step_in_epoch () const
 
void resize_shuffled_indices ()
 
void select_subset_of_data ()
 
virtual void use_unused_index_set (execution_mode m)
 
virtual bool has_list_per_model () const
 Does the data reader have a unique sample list per model. More...
 
virtual bool has_list_per_trainer () const
 Does the data reader have a unique sample list per trainer. More...
 
bool save_to_checkpoint_shared (persist &p, execution_mode mode)
 Given directory to store checkpoint files, write state to file and add to number of bytes written. More...
 
bool load_from_checkpoint_shared (persist &p, execution_mode mode)
 Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
 
bool save_to_checkpoint_distributed (persist &p, execution_mode mode)
 
bool load_from_checkpoint_distributed (persist &p, execution_mode mode)
 Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
 
const data_store_conduitget_data_store () const
 returns a const ref to the data store More...
 
data_store_conduitget_data_store ()
 returns a non-const ref to the data store More...
 
data_store_conduitget_data_store_ptr () const
 
void setup_data_store (int mini_batch_size)
 
void instantiate_data_store ()
 
virtual void preload_data_store ()
 
void set_gan_labelling (bool has_gan_labelling)
 
void set_gan_label_value (int gan_label_value)
 
void set_data_store (data_store_conduit *g)
 support of data store functionality More...
 
virtual bool data_store_active () const
 
virtual bool priming_data_store () const
 
virtual void post_update ()
 
void set_transform_pipeline (transform::transform_pipeline &&tp)
 
void print_get_methods (const std::string filename)
 Print the return values from various get_X methods to file. More...
 
size_t get_num_indices_to_use () const
 
void set_use_data_store (bool s)
 

Protected Attributes

std::vector< generic_data_reader * > m_data_readers
 List of readers providing data. More...
 
- Protected Attributes inherited from lbann::generic_data_reader
bool m_verbose = false
 
std::unordered_set< int > m_using_random_node
 
data_store_conduitm_data_store
 
lbann_commm_comm
 
bool m_use_data_store = false
 
std::map< data_field_type, bool > m_supported_input_types
 Holds a true value for each input data type that is supported. Use an ordered map so that checkpoints are stable. More...
 
bool m_gan_labelling
 
int m_gan_label_value
 
observer_ptr< thread_poolm_io_thread_pool
 
bool m_keep_sample_order
 
transform::transform_pipeline m_transform_pipeline
 
bool m_issue_warning
 

Additional Inherited Members

- Public Types inherited from lbann::generic_data_reader
using unused_index_map_t = std::map< execution_mode, std::vector< int > >
 
- Public Attributes inherited from lbann::generic_data_reader
int m_mini_batch_size
 
int m_current_pos
 
int m_stride_to_next_mini_batch
 
int m_base_offset
 
int m_sample_stride
 
int m_iteration_stride
 Stride used by parallel data readers within the model. More...
 
std::vector< int > m_shuffled_indices
 
unused_index_map_t m_unused_indices
 Record of the indicies that are not being used for training. More...
 
int m_last_mini_batch_size
 
int m_stride_to_last_mini_batch
 
int m_reset_mini_batch_index
 The index at which this data reader starts its epoch. More...
 
int m_loaded_mini_batch_idx
 The index of the current mini-batch that has been loaded. More...
 
int m_current_mini_batch_idx
 
int m_num_iterations_per_epoch
 
int m_num_parallel_readers
 How many iterations all readers will execute. More...
 
size_t m_max_files_to_load
 How many parallel readers are being used. More...
 
std::string m_file_dir
 
std::string m_local_file_dir
 
std::string m_data_sample_list
 
std::string m_data_fn
 
std::string m_label_fn
 
bool m_shuffle
 
size_t m_absolute_sample_count
 
std::map< execution_mode, double > m_execution_mode_split_fraction
 
double m_use_fraction
 
int m_first_n
 
std::string m_role
 
- Protected Member Functions inherited from lbann::generic_data_reader
size_t get_absolute_sample_count () const
 
double get_use_fraction () const
 
double get_execution_mode_split_fraction (execution_mode m) const
 
virtual bool fetch_data_block (std::map< data_field_type, CPUMat *> &input_buffers, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched)
 
bool fetch_data_block_conduit (std::vector< conduit::Node > &samples, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched)
 
virtual bool fetch_data_field (data_field_type data_field, CPUMat &Y, int data_id, int mb_idx)
 Called by fetch_data, fetch_label, fetch_response. More...
 
virtual bool fetch_conduit_node (conduit::Node &sample, int data_id)
 
virtual bool fetch_datum (CPUMat &X, int data_id, int mb_idx)
 
virtual bool fetch_label (CPUMat &Y, int data_id, int mb_idx)
 
virtual bool fetch_response (CPUMat &Y, int data_id, int mb_idx)
 
CPUMat create_datum_view (CPUMat &X, const int mb_idx)
 
virtual void preprocess_data_source (int tid)
 
virtual void postprocess_data_source (int tid)
 
virtual void shuffle_indices ()
 Shuffle indices (uses the data_seq_generator) More...
 
virtual void shuffle_indices (rng_gen &gen)
 Shuffle indices and profide a random number generator. More...
 
void error_check_counts () const
 

Detailed Description

Data reader for merging the samples from multiple data readers into a single dataset.

Definition at line 40 of file compound_data_reader.hpp.

Constructor & Destructor Documentation

◆ generic_compound_data_reader() [1/2]

lbann::generic_compound_data_reader::generic_compound_data_reader ( std::vector< generic_data_reader *>  data_readers,
bool  shuffle = true 
)
inline

Definition at line 43 of file compound_data_reader.hpp.

◆ generic_compound_data_reader() [2/2]

lbann::generic_compound_data_reader::generic_compound_data_reader ( const generic_compound_data_reader other)
inline

Definition at line 53 of file compound_data_reader.hpp.

◆ ~generic_compound_data_reader()

lbann::generic_compound_data_reader::~generic_compound_data_reader ( )
inlineoverride

Definition at line 73 of file compound_data_reader.hpp.

Here is the call graph for this function:

Member Function Documentation

◆ copy()

generic_compound_data_reader* lbann::generic_compound_data_reader::copy ( ) const
overridepure virtual

Implements lbann::generic_data_reader.

Implemented in lbann::data_reader_merge_features, and lbann::data_reader_merge_samples.

Here is the caller graph for this function:

◆ get_data_readers()

std::vector<generic_data_reader*>& lbann::generic_compound_data_reader::get_data_readers ( )
inline

needed to support data_store_merge_samples

Definition at line 103 of file compound_data_reader.hpp.

◆ has_labels()

bool lbann::generic_compound_data_reader::has_labels ( ) const
inlineoverridevirtual

Reimplemented from lbann::generic_data_reader.

Definition at line 108 of file compound_data_reader.hpp.

◆ has_responses()

bool lbann::generic_compound_data_reader::has_responses ( ) const
inlineoverridevirtual

Reimplemented from lbann::generic_data_reader.

Definition at line 116 of file compound_data_reader.hpp.

◆ operator=()

generic_compound_data_reader& lbann::generic_compound_data_reader::operator= ( const generic_compound_data_reader other)
inline

Definition at line 61 of file compound_data_reader.hpp.

Here is the call graph for this function:

◆ set_execution_mode_split_fraction()

void lbann::generic_compound_data_reader::set_execution_mode_split_fraction ( execution_mode  m,
double  s 
)
inlineoverridevirtual

Apply operations to subsidiary data readers.

Don't propagate the validation fraction to subsidiary readers The fraction is applied at the top level

Reimplemented from lbann::generic_data_reader.

Definition at line 84 of file compound_data_reader.hpp.

Here is the call graph for this function:

◆ set_has_labels()

void lbann::generic_compound_data_reader::set_has_labels ( const bool  b)
inlineoverridevirtual

Whether or not a data reader has labels.

Reimplemented from lbann::generic_data_reader.

Definition at line 124 of file compound_data_reader.hpp.

◆ set_has_responses()

void lbann::generic_compound_data_reader::set_has_responses ( const bool  b)
inlineoverridevirtual

Whether or not a data reader has a response field.

Reimplemented from lbann::generic_data_reader.

Definition at line 131 of file compound_data_reader.hpp.

◆ set_role()

void lbann::generic_compound_data_reader::set_role ( std::string  role)
inlineoverridevirtual

Set an idenifier for the dataset. The role should be one of "train", "test", or "validate".

Reimplemented from lbann::generic_data_reader.

Definition at line 94 of file compound_data_reader.hpp.

Here is the call graph for this function:

Member Data Documentation

◆ m_data_readers

std::vector<generic_data_reader*> lbann::generic_compound_data_reader::m_data_readers
protected

List of readers providing data.

Definition at line 142 of file compound_data_reader.hpp.


The documentation for this class was generated from the following file: