|
| | hdf5_reader (const bool shuffle, const std::string key_data, const std::string key_label, const std::string key_responses, const bool hyperslab_labels) |
| |
| | hdf5_reader (const hdf5_reader &) |
| |
| hdf5_reader & | operator= (const hdf5_reader &) |
| |
| | ~hdf5_reader () override |
| |
| hdf5_reader * | copy () const override |
| |
| void | copy_members (const hdf5_reader &rhs) |
| |
| std::string | get_type () const override |
| |
| void | load () override |
| |
| void | set_hdf5_paths (const std::vector< std::string > hdf5_paths) |
| |
| void | set_num_responses (const size_t num_responses) |
| |
| int | get_num_labels () const override |
| | Return the number of labels (classes) in this dataset. More...
|
| |
| int | get_num_responses () const override |
| | Return the number of responses in this dataset. More...
|
| |
| int | get_linearized_data_size () const override |
| | Get the linearized size (i.e. number of elements) in a sample. More...
|
| |
| int | get_linearized_label_size () const override |
| | Get the linearized size (i.e. number of elements) in a label. More...
|
| |
| int | get_linearized_response_size () const override |
| | Get the linearized size (i.e. number of elements) in a response. More...
|
| |
| const std::vector< El::Int > | get_data_dims () const override |
| | Get the dimensions of the data. More...
|
| |
| | generic_data_reader (bool shuffle=true) |
| |
| | generic_data_reader (const generic_data_reader &)=default |
| |
| generic_data_reader & | operator= (const generic_data_reader &)=default |
| |
| virtual | ~generic_data_reader () |
| |
| template<class Archive > |
| void | serialize (Archive &ar) |
| |
| void | set_comm (lbann_comm *comm) |
| | set the comm object More...
|
| |
| lbann_comm * | get_comm () const |
| | returns a (possibly nullptr) to comm More...
|
| |
| virtual bool | has_conduit_output () |
| |
| void | set_file_dir (std::string s) |
| |
| void | set_local_file_dir (std::string s) |
| |
| void | set_max_files_to_load (size_t n) |
| |
| std::string | get_file_dir () const |
| |
| std::string | get_local_file_dir () const |
| |
| void | set_data_sample_list (std::string s) |
| |
| std::string | get_data_sample_list () const |
| |
| void | keep_sample_order (bool same_order=false) |
| |
| void | set_data_filename (std::string s) |
| |
| std::string | get_data_filename () const |
| |
| void | set_label_filename (std::string s) |
| |
| std::string | get_label_filename () const |
| |
| void | set_shuffle (bool b) |
| |
| bool | is_shuffled () const |
| |
| void | set_shuffled_indices (const std::vector< int > &indices) |
| |
| const std::vector< int > & | get_shuffled_indices () const |
| |
| void | set_first_n (int n) |
| |
| void | set_absolute_sample_count (size_t s) |
| |
| void | set_use_fraction (double s) |
| |
| virtual void | set_execution_mode_split_fraction (execution_mode m, double s) |
| |
| virtual void | set_role (std::string role) |
| |
| std::string | get_role () const |
| |
| virtual void | setup (int num_io_threads, observer_ptr< thread_pool > io_thread_pool) |
| |
| int | fetch (std::map< data_field_type, CPUMat *> &input_buffers, El::Matrix< El::Int > &indices_fetched, size_t mb_size) |
| | Fetch a mini-batch worth of data, including samples, labels, responses (as appropriate) More...
|
| |
| int | fetch (std::vector< conduit::Node > &samples, El::Matrix< El::Int > &indices_fetched, size_t mb_size) |
| |
| virtual bool | has_data_field (data_field_type data_field) const |
| | Check to see if the data reader supports this specific data field. More...
|
| |
| virtual bool | has_labels () const |
| |
| virtual bool | has_responses () const |
| |
| void | set_has_data_field (data_field_type const data_field, const bool b) |
| | Whether or not a data reader has a data field. More...
|
| |
| virtual void | set_has_labels (const bool b) |
| | Whether or not a data reader has labels. More...
|
| |
| virtual void | set_has_responses (const bool b) |
| | Whether or not a data reader has a response field. More...
|
| |
| void | start_data_store_mini_batch_exchange () |
| |
| void | finish_data_store_mini_batch_exchange () |
| |
| virtual bool | update (bool is_active_reader) |
| |
| virtual int | get_linearized_size (data_field_type const &data_field) const |
| | get the linearized size of what is identified by desc. More...
|
| |
| virtual std::vector< El::Int > | get_slice_points (const slice_points_mode var_category, bool &is_supported) |
| |
| virtual bool | position_valid () const |
| | True if the data reader's current position is valid. More...
|
| |
| virtual bool | position_is_overrun () const |
| |
| bool | at_new_epoch () const |
| | True if the data reader is at the start of an epoch. More...
|
| |
| void | set_mini_batch_size (const int s) |
| | Set the mini batch size. More...
|
| |
| int | get_mini_batch_size () const |
| | Get the mini batch size. More...
|
| |
| int | get_loaded_mini_batch_size () const |
| | Get the loaded mini-batch size. More...
|
| |
| int | get_current_mini_batch_size () const |
| | Get the current mini-batch size. More...
|
| |
| int | get_mini_batch_max () const |
| | Return the full mini_batch_size. More...
|
| |
| void | set_stride_to_next_mini_batch (const int s) |
| | Set the mini batch stride. More...
|
| |
| int | get_stride_to_next_mini_batch () const |
| | Return the mini batch stride. More...
|
| |
| void | set_sample_stride (const int s) |
| | Set the sample stride. More...
|
| |
| int | get_sample_stride () const |
| | Return the sample stride. More...
|
| |
| void | set_iteration_stride (const int s) |
| | Set the iteration stride. More...
|
| |
| int | get_iteration_stride () const |
| | Return the iteration stride. More...
|
| |
| virtual void | set_base_offset (const int s) |
| | Return the base offset. More...
|
| |
| int | get_base_offset () const |
| | Return the base offset. More...
|
| |
| void | set_last_mini_batch_size (const int s) |
| | Set the last mini batch size. More...
|
| |
| int | get_last_mini_batch_size () const |
| | Return the last mini batch size. More...
|
| |
| void | set_stride_to_last_mini_batch (const int s) |
| | Set the last mini batch stride. More...
|
| |
| int | get_stride_to_last_mini_batch () const |
| | Return the last mini batch stride. More...
|
| |
| void | set_num_parallel_readers (const int s) |
| | Set the number of parallel readers per model. More...
|
| |
| int | get_num_parallel_readers () const |
| | Return the number of parallel readers per model. More...
|
| |
| virtual void | set_reset_mini_batch_index (const int s) |
| | Set the starting mini-batch index for the epoch. More...
|
| |
| int | get_reset_mini_batch_index () const |
| | Return the starting mini-batch index for the epoch. More...
|
| |
| int | get_loaded_mini_batch_index () const |
| | Return the current mini-batch index for the epoch. More...
|
| |
| int | get_current_mini_batch_index () const |
| | Return the current mini-batch index for the epoch. More...
|
| |
| void | set_initial_position () |
| | Set the current position based on the base and model offsets. More...
|
| |
| int | get_position () const |
| | Get the current position in the data reader. More...
|
| |
| int | get_next_position () const |
| | Get the next position in the data reader. More...
|
| |
| int * | get_indices () |
| | Get a pointer to the start of the shuffled indices. More...
|
| |
| virtual int | get_num_data () const |
| | Get the number of samples in this dataset. More...
|
| |
| int | get_num_unused_data (execution_mode m) const |
| | Get the number of unused samples in this dataset. More...
|
| |
| int * | get_unused_data (execution_mode m) |
| | Get a pointer to the start of the unused sample indices. More...
|
| |
| const std::vector< int > & | get_unused_indices (execution_mode m) |
| |
| void | set_num_iterations_per_epoch (int num_iterations_per_epoch) |
| | Set the number of iterations in each epoch. More...
|
| |
| int | get_num_iterations_per_epoch () const |
| | Get the number of iterations in each epoch. More...
|
| |
| int | get_current_step_in_epoch () const |
| |
| void | resize_shuffled_indices () |
| |
| void | select_subset_of_data () |
| |
| virtual void | use_unused_index_set (execution_mode m) |
| |
| virtual bool | has_list_per_model () const |
| | Does the data reader have a unique sample list per model. More...
|
| |
| virtual bool | has_list_per_trainer () const |
| | Does the data reader have a unique sample list per trainer. More...
|
| |
| bool | save_to_checkpoint_shared (persist &p, execution_mode mode) |
| | Given directory to store checkpoint files, write state to file and add to number of bytes written. More...
|
| |
| bool | load_from_checkpoint_shared (persist &p, execution_mode mode) |
| | Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
|
| |
| bool | save_to_checkpoint_distributed (persist &p, execution_mode mode) |
| |
| bool | load_from_checkpoint_distributed (persist &p, execution_mode mode) |
| | Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
|
| |
| const data_store_conduit & | get_data_store () const |
| | returns a const ref to the data store More...
|
| |
| data_store_conduit & | get_data_store () |
| | returns a non-const ref to the data store More...
|
| |
| data_store_conduit * | get_data_store_ptr () const |
| |
| void | setup_data_store (int mini_batch_size) |
| |
| void | instantiate_data_store () |
| |
| virtual void | preload_data_store () |
| |
| void | set_gan_labelling (bool has_gan_labelling) |
| |
| void | set_gan_label_value (int gan_label_value) |
| |
| void | set_data_store (data_store_conduit *g) |
| | support of data store functionality More...
|
| |
| virtual bool | data_store_active () const |
| |
| virtual bool | priming_data_store () const |
| |
| virtual void | post_update () |
| |
| void | set_transform_pipeline (transform::transform_pipeline &&tp) |
| |
| void | print_get_methods (const std::string filename) |
| | Print the return values from various get_X methods to file. More...
|
| |
| size_t | get_num_indices_to_use () const |
| |
| void | set_use_data_store (bool s) |
| |
|
| void | read_hdf5_hyperslab (hsize_t h_data, hsize_t filespace, int rank, TensorDataType *sample) |
| |
| void | read_hdf5_sample (int data_id, TensorDataType *sample, TensorDataType *labels) |
| |
| void | load_sample (conduit::Node &node, int data_id) |
| |
| bool | fetch_datum (CPUMat &X, int data_id, int mb_idx) override |
| |
| void | fetch_datum_conduit (Mat &X, int data_id) |
| |
| bool | fetch_data_field (data_field_type data_field, CPUMat &Y, int data_id, int mb_idx) override |
| | Called by fetch_data, fetch_label, fetch_response. More...
|
| |
| bool | fetch_label (CPUMat &Y, int data_id, int mb_idx) override |
| |
| bool | fetch_response (CPUMat &Y, int data_id, int mb_idx) override |
| |
| hid_t | get_hdf5_data_type () const |
| |
| conduit::DataType | get_conduit_data_type (conduit::index_t num_elements) const |
| |
| size_t | get_absolute_sample_count () const |
| |
| double | get_use_fraction () const |
| |
| double | get_execution_mode_split_fraction (execution_mode m) const |
| |
| virtual bool | fetch_data_block (std::map< data_field_type, CPUMat *> &input_buffers, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched) |
| |
| bool | fetch_data_block_conduit (std::vector< conduit::Node > &samples, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched) |
| |
| virtual bool | fetch_conduit_node (conduit::Node &sample, int data_id) |
| |
| CPUMat | create_datum_view (CPUMat &X, const int mb_idx) |
| |
| virtual void | preprocess_data_source (int tid) |
| |
| virtual void | postprocess_data_source (int tid) |
| |
| virtual void | shuffle_indices () |
| | Shuffle indices (uses the data_seq_generator) More...
|
| |
| virtual void | shuffle_indices (rng_gen &gen) |
| | Shuffle indices and profide a random number generator. More...
|
| |
| void | error_check_counts () const |
| |
template<typename TensorDataType>
class lbann::hdf5_reader< TensorDataType >
Data reader for data stored in HDF5 files. This data reader was designed to work with Distconv. This currently has two different modes:
- Datasets with 3D data and a few numbers of responses: This mode assumes a 3D cube dataset such as the CosmoFlow dataset. This requires set_has_responses to be called on setup.
- Datasets with 3D data and 3D labels: This mode assumes 3D cubes with corresponding 3D label tensors such as the LiTS dataset. This requires set_has_labels to be called on setup, and label_reconstruction should be used for the input layer.
Each HDF5 file should contain hdf5_key_data, hdf5_key_labels, and hdf5_key_responses keys to read data, labels and responses respectively.
Definition at line 64 of file data_reader_hdf5_legacy.hpp.