LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
lbann::hdf5_data_reader Class Reference

#include <data_reader_HDF5.hpp>

Inheritance diagram for lbann::hdf5_data_reader:
[legend]
Collaboration diagram for lbann::hdf5_data_reader:
[legend]

Classes

struct  PackingGroup
 

Public Member Functions

 hdf5_data_reader (bool shuffle=true)
 
 hdf5_data_reader (const hdf5_data_reader &)
 
hdf5_data_readeroperator= (const hdf5_data_reader &)
 
hdf5_data_readercopy () const override
 
void copy_members (const hdf5_data_reader &rhs)
 
 ~hdf5_data_reader () override
 
bool has_conduit_output () override
 
std::string get_type () const override
 
void print_metadata (std::ostream &os=std::cout)
 Prints metadata and data-types for all field-names. More...
 
void load () override
 
bool fetch_conduit_node (conduit::Node &sample, int data_id) override
 
void set_experiment_schema_filename (std::string fn)
 Sets the name of the yaml experiment file. More...
 
const std::string & get_experiment_schema_filename ()
 Returns the name of the yaml experiment file. More...
 
void set_data_schema_filename (std::string fn)
 Sets the name of the yaml data file. More...
 
const std::string & get_data_schema_filename ()
 Returns the name of the yaml data file. More...
 
const std::vector< El::Int > get_data_dims () const override
 Get the dimensions of the data. More...
 
int get_linearized_data_size () const override
 Get the linearized size (i.e. number of elements) in a sample. More...
 
int get_linearized_response_size () const override
 Get the linearized size (i.e. number of elements) in a response. More...
 
int get_linearized_label_size () const override
 Get the linearized size (i.e. number of elements) in a label. More...
 
int get_num_labels () const override
 Return the number of labels (classes) in this dataset. More...
 
int get_num_responses () const override
 Return the number of responses in this dataset. More...
 
conduit::Node get_experiment_schema () const
 this method is made public for testing More...
 
conduit::Node get_data_schema () const
 this method is made public for testing More...
 
void set_experiment_schema (const conduit::Node &s)
 this method is made public for testing More...
 
void set_data_schema (const conduit::Node &s)
 this method is made public for testing More...
 
std::unordered_map< std::string, conduit::Node > get_node_map () const
 this method is made public for testing More...
 
void adjust_metadata (conduit::Node *root)
 this method is made public for testing More...
 
- Public Member Functions inherited from lbann::data_reader_sample_list< sample_list_hdf5< std::string > >
 data_reader_sample_list (bool shuffle=true)
 
 data_reader_sample_list (const data_reader_sample_list &)
 
data_reader_sample_listoperator= (const data_reader_sample_list &)
 
 ~data_reader_sample_list () override
 
data_reader_sample_listcopy () const override
 
void copy_members (const data_reader_sample_list &rhs)
 
std::string get_type () const override
 
std::pair< file_handle_type, sample_name_typeopen_file (size_t index)
 Open the file and get the sample name for the given index. More...
 
void close_file (size_t index_in)
 
void shuffle_indices (rng_gen &gen) override
 
void load () override
 
sample_list_hdf5< std::string > & get_sample_list ()
 
sample_type get_sample (size_t index)
 
- Public Member Functions inherited from lbann::generic_data_reader
 generic_data_reader (bool shuffle=true)
 
 generic_data_reader (const generic_data_reader &)=default
 
generic_data_readeroperator= (const generic_data_reader &)=default
 
virtual ~generic_data_reader ()
 
template<class Archive >
void serialize (Archive &ar)
 
void set_comm (lbann_comm *comm)
 set the comm object More...
 
lbann_commget_comm () const
 returns a (possibly nullptr) to comm More...
 
void set_file_dir (std::string s)
 
void set_local_file_dir (std::string s)
 
void set_max_files_to_load (size_t n)
 
std::string get_file_dir () const
 
std::string get_local_file_dir () const
 
void set_data_sample_list (std::string s)
 
std::string get_data_sample_list () const
 
void keep_sample_order (bool same_order=false)
 
void set_data_filename (std::string s)
 
std::string get_data_filename () const
 
void set_label_filename (std::string s)
 
std::string get_label_filename () const
 
void set_shuffle (bool b)
 
bool is_shuffled () const
 
void set_shuffled_indices (const std::vector< int > &indices)
 
const std::vector< int > & get_shuffled_indices () const
 
void set_first_n (int n)
 
void set_absolute_sample_count (size_t s)
 
void set_use_fraction (double s)
 
virtual void set_execution_mode_split_fraction (execution_mode m, double s)
 
virtual void set_role (std::string role)
 
std::string get_role () const
 
virtual void setup (int num_io_threads, observer_ptr< thread_pool > io_thread_pool)
 
int fetch (std::map< data_field_type, CPUMat *> &input_buffers, El::Matrix< El::Int > &indices_fetched, size_t mb_size)
 Fetch a mini-batch worth of data, including samples, labels, responses (as appropriate) More...
 
int fetch (std::vector< conduit::Node > &samples, El::Matrix< El::Int > &indices_fetched, size_t mb_size)
 
virtual bool has_data_field (data_field_type data_field) const
 Check to see if the data reader supports this specific data field. More...
 
virtual bool has_labels () const
 
virtual bool has_responses () const
 
void set_has_data_field (data_field_type const data_field, const bool b)
 Whether or not a data reader has a data field. More...
 
virtual void set_has_labels (const bool b)
 Whether or not a data reader has labels. More...
 
virtual void set_has_responses (const bool b)
 Whether or not a data reader has a response field. More...
 
void start_data_store_mini_batch_exchange ()
 
void finish_data_store_mini_batch_exchange ()
 
virtual bool update (bool is_active_reader)
 
virtual std::vector< El::Int > get_slice_points (const slice_points_mode var_category, bool &is_supported)
 
virtual bool position_valid () const
 True if the data reader's current position is valid. More...
 
virtual bool position_is_overrun () const
 
bool at_new_epoch () const
 True if the data reader is at the start of an epoch. More...
 
void set_mini_batch_size (const int s)
 Set the mini batch size. More...
 
int get_mini_batch_size () const
 Get the mini batch size. More...
 
int get_loaded_mini_batch_size () const
 Get the loaded mini-batch size. More...
 
int get_current_mini_batch_size () const
 Get the current mini-batch size. More...
 
int get_mini_batch_max () const
 Return the full mini_batch_size. More...
 
void set_stride_to_next_mini_batch (const int s)
 Set the mini batch stride. More...
 
int get_stride_to_next_mini_batch () const
 Return the mini batch stride. More...
 
void set_sample_stride (const int s)
 Set the sample stride. More...
 
int get_sample_stride () const
 Return the sample stride. More...
 
void set_iteration_stride (const int s)
 Set the iteration stride. More...
 
int get_iteration_stride () const
 Return the iteration stride. More...
 
virtual void set_base_offset (const int s)
 Return the base offset. More...
 
int get_base_offset () const
 Return the base offset. More...
 
void set_last_mini_batch_size (const int s)
 Set the last mini batch size. More...
 
int get_last_mini_batch_size () const
 Return the last mini batch size. More...
 
void set_stride_to_last_mini_batch (const int s)
 Set the last mini batch stride. More...
 
int get_stride_to_last_mini_batch () const
 Return the last mini batch stride. More...
 
void set_num_parallel_readers (const int s)
 Set the number of parallel readers per model. More...
 
int get_num_parallel_readers () const
 Return the number of parallel readers per model. More...
 
virtual void set_reset_mini_batch_index (const int s)
 Set the starting mini-batch index for the epoch. More...
 
int get_reset_mini_batch_index () const
 Return the starting mini-batch index for the epoch. More...
 
int get_loaded_mini_batch_index () const
 Return the current mini-batch index for the epoch. More...
 
int get_current_mini_batch_index () const
 Return the current mini-batch index for the epoch. More...
 
void set_initial_position ()
 Set the current position based on the base and model offsets. More...
 
int get_position () const
 Get the current position in the data reader. More...
 
int get_next_position () const
 Get the next position in the data reader. More...
 
int * get_indices ()
 Get a pointer to the start of the shuffled indices. More...
 
virtual int get_num_data () const
 Get the number of samples in this dataset. More...
 
int get_num_unused_data (execution_mode m) const
 Get the number of unused samples in this dataset. More...
 
int * get_unused_data (execution_mode m)
 Get a pointer to the start of the unused sample indices. More...
 
const std::vector< int > & get_unused_indices (execution_mode m)
 
void set_num_iterations_per_epoch (int num_iterations_per_epoch)
 Set the number of iterations in each epoch. More...
 
int get_num_iterations_per_epoch () const
 Get the number of iterations in each epoch. More...
 
int get_current_step_in_epoch () const
 
void resize_shuffled_indices ()
 
void select_subset_of_data ()
 
virtual void use_unused_index_set (execution_mode m)
 
virtual bool has_list_per_model () const
 Does the data reader have a unique sample list per model. More...
 
virtual bool has_list_per_trainer () const
 Does the data reader have a unique sample list per trainer. More...
 
bool save_to_checkpoint_shared (persist &p, execution_mode mode)
 Given directory to store checkpoint files, write state to file and add to number of bytes written. More...
 
bool load_from_checkpoint_shared (persist &p, execution_mode mode)
 Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
 
bool save_to_checkpoint_distributed (persist &p, execution_mode mode)
 
bool load_from_checkpoint_distributed (persist &p, execution_mode mode)
 Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
 
const data_store_conduitget_data_store () const
 returns a const ref to the data store More...
 
data_store_conduitget_data_store ()
 returns a non-const ref to the data store More...
 
data_store_conduitget_data_store_ptr () const
 
void setup_data_store (int mini_batch_size)
 
void instantiate_data_store ()
 
virtual void preload_data_store ()
 
void set_gan_labelling (bool has_gan_labelling)
 
void set_gan_label_value (int gan_label_value)
 
void set_data_store (data_store_conduit *g)
 support of data store functionality More...
 
virtual bool data_store_active () const
 
virtual bool priming_data_store () const
 
virtual void post_update ()
 
void set_transform_pipeline (transform::transform_pipeline &&tp)
 
void print_get_methods (const std::string filename)
 Print the return values from various get_X methods to file. More...
 
size_t get_num_indices_to_use () const
 
void set_use_data_store (bool s)
 

Private Member Functions

const std::vector< El::Int > get_data_dims (std::string name="") const
 
int get_linearized_size (data_field_type const &data_field) const override
 
void load_sample_schema (conduit::Schema &s)
 
void parse_schemas ()
 
void get_schema_ptrs (conduit::Node *starting_node, std::unordered_map< std::string, conduit::Node *> &schema_name_map)
 
void get_leaves (conduit::Node *node_in, std::unordered_map< std::string, conduit::Node *> &leaves_out)
 
void get_leaves_multi (conduit::Node *node_in, std::unordered_map< std::string, conduit::Node *> &leaves_out)
 
void do_preload_data_store () override
 
void load_sample (conduit::Node &node, hid_t file_handle, const std::string &sample_name, bool ignore_failure=false)
 
void load_sample_from_sample_list (conduit::Node &node, size_t index, bool ignore_failure=false)
 
void pack_data (conduit::Node &node_in_out)
 
void load_schema (std::string filename, conduit::Node &schema)
 
void pack (conduit::Node &node, size_t index)
 
conduit::Node merge_metadata_nodes (const conduit::Node *node_A, const conduit::Node *node_B)
 
void build_packing_map (conduit::Node &node)
 
void repack_image (conduit::Node &node, const std::string &path, const conduit::Node &metadata)
 
void coerce (const conduit::Node &metadata, hid_t file_handle, const std::string &original_path, const std::string &new_pathname, conduit::Node &node)
 
void normalize (conduit::Node &node, const std::string &path, const conduit::Node &metadata)
 
void construct_linearized_size_lookup_tables ()
 
void construct_linearized_size_lookup_tables (conduit::Node &node)
 
void test_that_all_nodes_contain_metadata (conduit::Node &node)
 
bool get_delete_packed_fields ()
 
void set_delete_packed_fields (bool flag)
 
template<typename T >
void pack (std::string const &group_name, conduit::Node &node, size_t index)
 
bool is_composite_node (const conduit::Node &node) const
 

Private Attributes

std::unordered_map< std::string, std::vector< El::Int > > m_data_dims_lookup_table
 
std::unordered_map< std::string, int > m_linearized_size_lookup_table
 
std::string m_experiment_schema_filename
 
std::string m_data_schema_filename
 
bool m_delete_packed_fields = true
 
std::unordered_map< std::string, PackingGroupm_packing_groups
 
const std::string s_metadata_node_name = "metadata"
 
const std::string s_composite_node = "composite_node"
 
std::unordered_map< std::string, conduit::Node * > m_useme_node_map_ptrs
 
std::unordered_map< std::string, conduit::Node > m_useme_node_map
 
conduit::Node m_experiment_schema
 
conduit::Node m_data_schema
 
std::unordered_map< std::string, conduit::Node * > m_data_map
 
std::unordered_set< std::string > m_add_to_map
 

Friends

class ::DataReaderHDF5WhiteboxTester
 

Additional Inherited Members

- Public Types inherited from lbann::data_reader_sample_list< sample_list_hdf5< std::string > >
using sample_name_type = typename sample_list_hdf5< std::string > ::name_t
 
using sample_file_id_type = typename sample_list_hdf5< std::string > ::sample_file_id_t
 
using sample_type = std::pair< sample_file_id_type, sample_name_type >
 
using file_handle_type = typename sample_list_hdf5< std::string > ::file_handle_t
 
- Public Types inherited from lbann::generic_data_reader
using unused_index_map_t = std::map< execution_mode, std::vector< int > >
 
- Public Attributes inherited from lbann::generic_data_reader
int m_mini_batch_size
 
int m_current_pos
 
int m_stride_to_next_mini_batch
 
int m_base_offset
 
int m_sample_stride
 
int m_iteration_stride
 Stride used by parallel data readers within the model. More...
 
std::vector< int > m_shuffled_indices
 
unused_index_map_t m_unused_indices
 Record of the indicies that are not being used for training. More...
 
int m_last_mini_batch_size
 
int m_stride_to_last_mini_batch
 
int m_reset_mini_batch_index
 The index at which this data reader starts its epoch. More...
 
int m_loaded_mini_batch_idx
 The index of the current mini-batch that has been loaded. More...
 
int m_current_mini_batch_idx
 
int m_num_iterations_per_epoch
 
int m_num_parallel_readers
 How many iterations all readers will execute. More...
 
size_t m_max_files_to_load
 How many parallel readers are being used. More...
 
std::string m_file_dir
 
std::string m_local_file_dir
 
std::string m_data_sample_list
 
std::string m_data_fn
 
std::string m_label_fn
 
bool m_shuffle
 
size_t m_absolute_sample_count
 
std::map< execution_mode, double > m_execution_mode_split_fraction
 
double m_use_fraction
 
int m_first_n
 
std::string m_role
 
- Protected Member Functions inherited from lbann::data_reader_sample_list< sample_list_hdf5< std::string > >
void load_list_of_samples (const std::string sample_list_file)
 
void load_list_of_samples_from_archive (const std::string &sample_list_archive)
 
- Protected Member Functions inherited from lbann::generic_data_reader
size_t get_absolute_sample_count () const
 
double get_use_fraction () const
 
double get_execution_mode_split_fraction (execution_mode m) const
 
virtual bool fetch_data_block (std::map< data_field_type, CPUMat *> &input_buffers, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched)
 
bool fetch_data_block_conduit (std::vector< conduit::Node > &samples, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched)
 
virtual bool fetch_data_field (data_field_type data_field, CPUMat &Y, int data_id, int mb_idx)
 Called by fetch_data, fetch_label, fetch_response. More...
 
virtual bool fetch_datum (CPUMat &X, int data_id, int mb_idx)
 
virtual bool fetch_label (CPUMat &Y, int data_id, int mb_idx)
 
virtual bool fetch_response (CPUMat &Y, int data_id, int mb_idx)
 
CPUMat create_datum_view (CPUMat &X, const int mb_idx)
 
virtual void preprocess_data_source (int tid)
 
virtual void postprocess_data_source (int tid)
 
virtual void shuffle_indices ()
 Shuffle indices (uses the data_seq_generator) More...
 
void error_check_counts () const
 
- Protected Attributes inherited from lbann::data_reader_sample_list< sample_list_hdf5< std::string > >
sample_list_hdf5< std::string > m_sample_list
 
- Protected Attributes inherited from lbann::generic_data_reader
bool m_verbose = false
 
std::unordered_set< int > m_using_random_node
 
data_store_conduitm_data_store
 
lbann_commm_comm
 
bool m_use_data_store = false
 
std::map< data_field_type, bool > m_supported_input_types
 Holds a true value for each input data type that is supported. Use an ordered map so that checkpoints are stable. More...
 
bool m_gan_labelling
 
int m_gan_label_value
 
observer_ptr< thread_poolm_io_thread_pool
 
bool m_keep_sample_order
 
transform::transform_pipeline m_transform_pipeline
 
bool m_issue_warning
 

Detailed Description

A generalized data reader for data stored in HDF5 files.

Definition at line 86 of file data_reader_HDF5.hpp.

Constructor & Destructor Documentation

◆ hdf5_data_reader() [1/2]

lbann::hdf5_data_reader::hdf5_data_reader ( bool  shuffle = true)
Here is the caller graph for this function:

◆ hdf5_data_reader() [2/2]

lbann::hdf5_data_reader::hdf5_data_reader ( const hdf5_data_reader )

◆ ~hdf5_data_reader()

lbann::hdf5_data_reader::~hdf5_data_reader ( )
override
Here is the caller graph for this function:

Member Function Documentation

◆ adjust_metadata()

void lbann::hdf5_data_reader::adjust_metadata ( conduit::Node *  root)

this method is made public for testing

On return, every Node will have a (possibly empty) child node named <s_metadata_node_name>. The rules: 1) a node inherits the metadata node of its parent; 2) if the node already has a metadata child, the contents are preserved; if both parent and child have the same named field, the child's takes precedence. Recursive.

Here is the caller graph for this function:

◆ build_packing_map()

void lbann::hdf5_data_reader::build_packing_map ( conduit::Node &  node)
private

Fills in m_packing_groups data structure

◆ coerce()

void lbann::hdf5_data_reader::coerce ( const conduit::Node &  metadata,
hid_t  file_handle,
const std::string &  original_path,
const std::string &  new_pathname,
conduit::Node &  node 
)
private

called from load_sample

◆ construct_linearized_size_lookup_tables() [1/2]

void lbann::hdf5_data_reader::construct_linearized_size_lookup_tables ( )
private

Constructs m_data_dims_lookup_table and m_linearized_size_lookup_table

Here is the caller graph for this function:

◆ construct_linearized_size_lookup_tables() [2/2]

void lbann::hdf5_data_reader::construct_linearized_size_lookup_tables ( conduit::Node &  node)
private

◆ copy()

hdf5_data_reader* lbann::hdf5_data_reader::copy ( ) const
inlineoverridevirtual

Implements lbann::generic_data_reader.

Definition at line 93 of file data_reader_HDF5.hpp.

Here is the call graph for this function:

◆ copy_members()

void lbann::hdf5_data_reader::copy_members ( const hdf5_data_reader rhs)
Here is the caller graph for this function:

◆ do_preload_data_store()

void lbann::hdf5_data_reader::do_preload_data_store ( )
overrideprivatevirtual

Reimplemented from lbann::generic_data_reader.

◆ fetch_conduit_node()

bool lbann::hdf5_data_reader::fetch_conduit_node ( conduit::Node &  sample,
int  data_id 
)
overridevirtual

Reimplemented from lbann::generic_data_reader.

Here is the caller graph for this function:

◆ get_data_dims() [1/2]

const std::vector<El::Int> lbann::hdf5_data_reader::get_data_dims ( ) const
inlineoverridevirtual

Get the dimensions of the data.

Reimplemented from lbann::generic_data_reader.

Definition at line 139 of file data_reader_HDF5.hpp.

◆ get_data_dims() [2/2]

const std::vector<El::Int> lbann::hdf5_data_reader::get_data_dims ( std::string  name = "") const
private

◆ get_data_schema()

conduit::Node lbann::hdf5_data_reader::get_data_schema ( ) const
inline

this method is made public for testing

Definition at line 169 of file data_reader_HDF5.hpp.

Here is the call graph for this function:

◆ get_data_schema_filename()

const std::string& lbann::hdf5_data_reader::get_data_schema_filename ( )
inline

Returns the name of the yaml data file.

Definition at line 134 of file data_reader_HDF5.hpp.

◆ get_delete_packed_fields()

bool lbann::hdf5_data_reader::get_delete_packed_fields ( )
inlineprivate

Definition at line 369 of file data_reader_HDF5.hpp.

◆ get_experiment_schema()

conduit::Node lbann::hdf5_data_reader::get_experiment_schema ( ) const
inline

this method is made public for testing

Definition at line 167 of file data_reader_HDF5.hpp.

◆ get_experiment_schema_filename()

const std::string& lbann::hdf5_data_reader::get_experiment_schema_filename ( )
inline

Returns the name of the yaml experiment file.

Definition at line 125 of file data_reader_HDF5.hpp.

◆ get_leaves()

void lbann::hdf5_data_reader::get_leaves ( conduit::Node *  node_in,
std::unordered_map< std::string, conduit::Node *> &  leaves_out 
)
private

Returns, in leaves, the schemas for leaf nodes in the tree rooted at 'node_in.' Optionally ignores nodes named "metadata" (or whatever 's_metadata_node_name' is set to). Keys in the filled-in map are the pathnames to the leaf nodes.

◆ get_leaves_multi()

void lbann::hdf5_data_reader::get_leaves_multi ( conduit::Node *  node_in,
std::unordered_map< std::string, conduit::Node *> &  leaves_out 
)
private

Functionality is similar to get_leaves(). This method differs in that two conduit::Node trees are searched for leaves. The leaves from the first are found, and are then treated as starting points for continuing the search in the second tree. In practice, the first tree is defined by the experiment_schema, and the second by the data_schema.

◆ get_linearized_data_size()

int lbann::hdf5_data_reader::get_linearized_data_size ( ) const
inlineoverridevirtual

Get the linearized size (i.e. number of elements) in a sample.

Reimplemented from lbann::generic_data_reader.

Definition at line 144 of file data_reader_HDF5.hpp.

Here is the call graph for this function:

◆ get_linearized_label_size()

int lbann::hdf5_data_reader::get_linearized_label_size ( ) const
inlineoverridevirtual

Get the linearized size (i.e. number of elements) in a label.

Reimplemented from lbann::generic_data_reader.

Definition at line 154 of file data_reader_HDF5.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_linearized_response_size()

int lbann::hdf5_data_reader::get_linearized_response_size ( ) const
inlineoverridevirtual

Get the linearized size (i.e. number of elements) in a response.

Reimplemented from lbann::generic_data_reader.

Definition at line 149 of file data_reader_HDF5.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_linearized_size()

int lbann::hdf5_data_reader::get_linearized_size ( data_field_type const &  data_field) const
overrideprivatevirtual

Returns the size of the requested field (datum, label, response, etc)

Reimplemented from lbann::generic_data_reader.

Here is the caller graph for this function:

◆ get_node_map()

std::unordered_map<std::string, conduit::Node> lbann::hdf5_data_reader::get_node_map ( ) const
inline

this method is made public for testing

Definition at line 175 of file data_reader_HDF5.hpp.

Here is the call graph for this function:

◆ get_num_labels()

int lbann::hdf5_data_reader::get_num_labels ( ) const
inlineoverridevirtual

Return the number of labels (classes) in this dataset.

This is called at the end of update; it permits data readers to perform actions that are specific to their data sets, for example, data_reader_jag_conduit_hdf5 has the 'primary' data reader bcast its shuffled indices to the other data readers. In general most data readers will probably not overide this method. It may also be called outside of update.

Reimplemented from lbann::generic_data_reader.

Definition at line 159 of file data_reader_HDF5.hpp.

Here is the call graph for this function:

◆ get_num_responses()

int lbann::hdf5_data_reader::get_num_responses ( ) const
inlineoverridevirtual

Return the number of responses in this dataset.

Reimplemented from lbann::generic_data_reader.

Definition at line 161 of file data_reader_HDF5.hpp.

Here is the call graph for this function:

◆ get_schema_ptrs()

void lbann::hdf5_data_reader::get_schema_ptrs ( conduit::Node *  starting_node,
std::unordered_map< std::string, conduit::Node *> &  schema_name_map 
)
private

get pointers to all nodes in the subtree rooted at the 'starting_node;' keys are the pathnames; recursive. However, ignores any nodes named "metadata" (or whatever 's_metadata_node_name' is set to).

◆ get_type()

std::string lbann::hdf5_data_reader::get_type ( ) const
inlineoverridevirtual

Return this data_reader's type

Implements lbann::generic_data_reader.

Definition at line 102 of file data_reader_HDF5.hpp.

Here is the call graph for this function:

◆ has_conduit_output()

bool lbann::hdf5_data_reader::has_conduit_output ( )
inlineoverridevirtual

Reimplemented from lbann::generic_data_reader.

Definition at line 100 of file data_reader_HDF5.hpp.

◆ is_composite_node()

bool lbann::hdf5_data_reader::is_composite_node ( const conduit::Node &  node) const
private

Returns true if this is a node that was constructed from one or more original data fields

Here is the caller graph for this function:

◆ load()

void lbann::hdf5_data_reader::load ( )
overridevirtual

Load the dataset. Each data reader implementation should implement this to initialize its internal data structures, determine the number of samples and their dimensionality (if needed), and set up and shuffle samples.

Implements lbann::generic_data_reader.

Here is the caller graph for this function:

◆ load_sample()

void lbann::hdf5_data_reader::load_sample ( conduit::Node &  node,
hid_t  file_handle,
const std::string &  sample_name,
bool  ignore_failure = false 
)
private

Loads a sample from file to a conduit::Node; call normalize, coerce, pack, etc. "ignore_failure" is only used for by the call to print_metadata().

Here is the caller graph for this function:

◆ load_sample_from_sample_list()

void lbann::hdf5_data_reader::load_sample_from_sample_list ( conduit::Node &  node,
size_t  index,
bool  ignore_failure = false 
)
private

Finds a sample in the sample list by index and then loads it.

◆ load_sample_schema()

void lbann::hdf5_data_reader::load_sample_schema ( conduit::Schema &  s)
private

P_0 reads and bcasts the schema

◆ load_schema()

void lbann::hdf5_data_reader::load_schema ( std::string  filename,
conduit::Node &  schema 
)
private

loads a schema from file

◆ merge_metadata_nodes()

conduit::Node lbann::hdf5_data_reader::merge_metadata_nodes ( const conduit::Node *  node_A,
const conduit::Node *  node_B 
)
private

Merges the contents of the two input nodes, either of which may be a nullptr. If the input nodes contain a common field-name, then the value from node_B are used, and the value from node_A discarded.

◆ normalize()

void lbann::hdf5_data_reader::normalize ( conduit::Node &  node,
const std::string &  path,
const conduit::Node &  metadata 
)
private
Here is the caller graph for this function:

◆ operator=()

hdf5_data_reader& lbann::hdf5_data_reader::operator= ( const hdf5_data_reader )

◆ pack() [1/2]

void lbann::hdf5_data_reader::pack ( conduit::Node &  node,
size_t  index 
)
private

pack the data; this is for all 'groups' in the node

Here is the caller graph for this function:

◆ pack() [2/2]

template<typename T >
void lbann::hdf5_data_reader::pack ( std::string const &  group_name,
conduit::Node &  node,
size_t  index 
)
private

Packs all fields assigned to 'group_name' (datum, label, response) into a 1D vector; the packed field is then inserted in a conduit node, that is passed to the data_store

◆ pack_data()

void lbann::hdf5_data_reader::pack_data ( conduit::Node &  node_in_out)
private

Performs packing, normalization, etc. Called by load_sample.

◆ parse_schemas()

void lbann::hdf5_data_reader::parse_schemas ( )
private

Fills in various data structures by parsing the schemas (i.e, m_data_schema and m_experiment_schema

Here is the caller graph for this function:

◆ print_metadata()

void lbann::hdf5_data_reader::print_metadata ( std::ostream &  os = std::cout)

Prints metadata and data-types for all field-names.

Note: if you change the "os" parameter to other than cout, some information will be lost; this is because conduit print() methods do not take parameters; they only print to cout. Note: this method is called internally (I forget from exactly where), and can be disabled by the cmd line switch: –quiet

Here is the caller graph for this function:

◆ repack_image()

void lbann::hdf5_data_reader::repack_image ( conduit::Node &  node,
const std::string &  path,
const conduit::Node &  metadata 
)
private

repacks from HWC to CHW

Here is the caller graph for this function:

◆ set_data_schema()

void lbann::hdf5_data_reader::set_data_schema ( const conduit::Node &  s)

this method is made public for testing

Here is the caller graph for this function:

◆ set_data_schema_filename()

void lbann::hdf5_data_reader::set_data_schema_filename ( std::string  fn)
inline

Sets the name of the yaml data file.

Definition at line 131 of file data_reader_HDF5.hpp.

◆ set_delete_packed_fields()

void lbann::hdf5_data_reader::set_delete_packed_fields ( bool  flag)
inlineprivate

Definition at line 370 of file data_reader_HDF5.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ set_experiment_schema()

void lbann::hdf5_data_reader::set_experiment_schema ( const conduit::Node &  s)

this method is made public for testing

Here is the caller graph for this function:

◆ set_experiment_schema_filename()

void lbann::hdf5_data_reader::set_experiment_schema_filename ( std::string  fn)
inline

Sets the name of the yaml experiment file.

Definition at line 119 of file data_reader_HDF5.hpp.

◆ test_that_all_nodes_contain_metadata()

void lbann::hdf5_data_reader::test_that_all_nodes_contain_metadata ( conduit::Node &  node)
private

sanity check; call after adjust_metadata

Friends And Related Function Documentation

◆ ::DataReaderHDF5WhiteboxTester

friend class ::DataReaderHDF5WhiteboxTester
friend

Definition at line 389 of file data_reader_HDF5.hpp.

Member Data Documentation

◆ m_add_to_map

std::unordered_set<std::string> lbann::hdf5_data_reader::m_add_to_map
private

only used in pack()

Definition at line 265 of file data_reader_HDF5.hpp.

◆ m_data_dims_lookup_table

std::unordered_map<std::string, std::vector<El::Int> > lbann::hdf5_data_reader::m_data_dims_lookup_table
private

filled in by construct_linearized_size_lookup_tables; used by get_data_dims()

Definition at line 195 of file data_reader_HDF5.hpp.

◆ m_data_map

std::unordered_map<std::string, conduit::Node*> lbann::hdf5_data_reader::m_data_map
private

Used internally in the construction of the other node maps - refers to nodes that don't contain data Maps a node's pathname to the node for m_data_schema

Definition at line 262 of file data_reader_HDF5.hpp.

◆ m_data_schema

conduit::Node lbann::hdf5_data_reader::m_data_schema
private

Schema supplied by the user; this contains a listing of all fields of a sample (i.e, as it appears on disk); may contain additional "metadata" nodes that contain processing directives, normalization values, etc.

Definition at line 257 of file data_reader_HDF5.hpp.

◆ m_data_schema_filename

std::string lbann::hdf5_data_reader::m_data_schema_filename
private

Definition at line 204 of file data_reader_HDF5.hpp.

◆ m_delete_packed_fields

bool lbann::hdf5_data_reader::m_delete_packed_fields = true
private

Definition at line 211 of file data_reader_HDF5.hpp.

◆ m_experiment_schema

conduit::Node lbann::hdf5_data_reader::m_experiment_schema
private

Schema supplied by the user; this contains a listing of the fields that will be used in an experiment; additionally may contain processing directives related to type coercion, packing, etc. Takes precidences over m_data_schema and inherits from m_data_schema

Definition at line 250 of file data_reader_HDF5.hpp.

◆ m_experiment_schema_filename

std::string lbann::hdf5_data_reader::m_experiment_schema_filename
private

Definition at line 202 of file data_reader_HDF5.hpp.

◆ m_linearized_size_lookup_table

std::unordered_map<std::string, int> lbann::hdf5_data_reader::m_linearized_size_lookup_table
private

filled in by construct_linearized_size_lookup_tables; used by get_linearized_size()

Definition at line 200 of file data_reader_HDF5.hpp.

◆ m_packing_groups

std::unordered_map<std::string, PackingGroup> lbann::hdf5_data_reader::m_packing_groups
private

Definition at line 223 of file data_reader_HDF5.hpp.

◆ m_useme_node_map

std::unordered_map<std::string, conduit::Node> lbann::hdf5_data_reader::m_useme_node_map
private

maps: Node's path -> the Node

Definition at line 244 of file data_reader_HDF5.hpp.

◆ m_useme_node_map_ptrs

std::unordered_map<std::string, conduit::Node*> lbann::hdf5_data_reader::m_useme_node_map_ptrs
private

Refers to data that will be used for the experiment. Combination of hte data & experimental schema.

DAH (7/20/21) m_useme_node_map_ptr should be completely replaced by m_useme_node_map (background: m_useme_node_map_ptrs was what I 1st coded, then realized that you shouldn't copy pointers in copy_members)

BVE

Todo:
  • cleanup node map pointers

maps: Node's path -> the Node

Definition at line 242 of file data_reader_HDF5.hpp.

◆ s_composite_node

const std::string lbann::hdf5_data_reader::s_composite_node = "composite_node"
private

Definition at line 230 of file data_reader_HDF5.hpp.

◆ s_metadata_node_name

const std::string lbann::hdf5_data_reader::s_metadata_node_name = "metadata"
private

Name of nodes in schemas that contain instructions on normalizing, packing, and casting data, etc.

Definition at line 228 of file data_reader_HDF5.hpp.


The documentation for this class was generated from the following file: