LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
lbann::pilot2_molecular_reader Class Reference

#include <data_reader_pilot2_molecular.hpp>

Inheritance diagram for lbann::pilot2_molecular_reader:
[legend]
Collaboration diagram for lbann::pilot2_molecular_reader:
[legend]

Public Member Functions

 pilot2_molecular_reader (int num_neighbors, int max_neighborhood, bool shuffle=true)
 
 pilot2_molecular_reader (const pilot2_molecular_reader &)=default
 
pilot2_molecular_readeroperator= (const pilot2_molecular_reader &)=default
 
 ~pilot2_molecular_reader () override
 
pilot2_molecular_readercopy () const override
 
std::string get_type () const override
 
void load () override
 
int get_linearized_data_size () const override
 Get the linearized size (i.e. number of elements) in a sample. More...
 
const std::vector< El::Int > get_data_dims () const override
 Get the dimensions of the data. More...
 
template<class T >
scale_data (int idx, T datum)
 
float * get_features_4 ()
 support for data_store_pilot2_molecular More...
 
double * get_features_8 ()
 
float * get_neighbors_4 ()
 
double * get_neighbors_8 ()
 
int get_word_size () const
 support for data_store_pilot2_molecular More...
 
int get_num_neighbors () const
 support for data_store_pilot2_molecular More...
 
int get_frame (int data_id) const
 
int get_num_samples_per_frame () const
 support for data_store_pilot2_molecular More...
 
int get_max_neighborhood () const
 support for data_store_pilot2_molecular More...
 
int get_num_features () const
 support for data_store_pilot2_molecular More...
 
int get_neighbors_data_size ()
 support for data_store_pilot2_molecular More...
 
- Public Member Functions inherited from lbann::generic_data_reader
 generic_data_reader (bool shuffle=true)
 
 generic_data_reader (const generic_data_reader &)=default
 
generic_data_readeroperator= (const generic_data_reader &)=default
 
virtual ~generic_data_reader ()
 
template<class Archive >
void serialize (Archive &ar)
 
void set_comm (lbann_comm *comm)
 set the comm object More...
 
lbann_commget_comm () const
 returns a (possibly nullptr) to comm More...
 
virtual bool has_conduit_output ()
 
void set_file_dir (std::string s)
 
void set_local_file_dir (std::string s)
 
void set_max_files_to_load (size_t n)
 
std::string get_file_dir () const
 
std::string get_local_file_dir () const
 
void set_data_sample_list (std::string s)
 
std::string get_data_sample_list () const
 
void keep_sample_order (bool same_order=false)
 
void set_data_filename (std::string s)
 
std::string get_data_filename () const
 
void set_label_filename (std::string s)
 
std::string get_label_filename () const
 
void set_shuffle (bool b)
 
bool is_shuffled () const
 
void set_shuffled_indices (const std::vector< int > &indices)
 
const std::vector< int > & get_shuffled_indices () const
 
void set_first_n (int n)
 
void set_absolute_sample_count (size_t s)
 
void set_use_fraction (double s)
 
virtual void set_execution_mode_split_fraction (execution_mode m, double s)
 
virtual void set_role (std::string role)
 
std::string get_role () const
 
virtual void setup (int num_io_threads, observer_ptr< thread_pool > io_thread_pool)
 
int fetch (std::map< data_field_type, CPUMat *> &input_buffers, El::Matrix< El::Int > &indices_fetched, size_t mb_size)
 Fetch a mini-batch worth of data, including samples, labels, responses (as appropriate) More...
 
int fetch (std::vector< conduit::Node > &samples, El::Matrix< El::Int > &indices_fetched, size_t mb_size)
 
virtual bool has_data_field (data_field_type data_field) const
 Check to see if the data reader supports this specific data field. More...
 
virtual bool has_labels () const
 
virtual bool has_responses () const
 
void set_has_data_field (data_field_type const data_field, const bool b)
 Whether or not a data reader has a data field. More...
 
virtual void set_has_labels (const bool b)
 Whether or not a data reader has labels. More...
 
virtual void set_has_responses (const bool b)
 Whether or not a data reader has a response field. More...
 
void start_data_store_mini_batch_exchange ()
 
void finish_data_store_mini_batch_exchange ()
 
virtual bool update (bool is_active_reader)
 
virtual int get_num_labels () const
 Return the number of labels (classes) in this dataset. More...
 
virtual int get_num_responses () const
 Return the number of responses in this dataset. More...
 
virtual int get_linearized_label_size () const
 Get the linearized size (i.e. number of elements) in a label. More...
 
virtual int get_linearized_response_size () const
 Get the linearized size (i.e. number of elements) in a response. More...
 
virtual int get_linearized_size (data_field_type const &data_field) const
 get the linearized size of what is identified by desc. More...
 
virtual std::vector< El::Int > get_slice_points (const slice_points_mode var_category, bool &is_supported)
 
virtual bool position_valid () const
 True if the data reader's current position is valid. More...
 
virtual bool position_is_overrun () const
 
bool at_new_epoch () const
 True if the data reader is at the start of an epoch. More...
 
void set_mini_batch_size (const int s)
 Set the mini batch size. More...
 
int get_mini_batch_size () const
 Get the mini batch size. More...
 
int get_loaded_mini_batch_size () const
 Get the loaded mini-batch size. More...
 
int get_current_mini_batch_size () const
 Get the current mini-batch size. More...
 
int get_mini_batch_max () const
 Return the full mini_batch_size. More...
 
void set_stride_to_next_mini_batch (const int s)
 Set the mini batch stride. More...
 
int get_stride_to_next_mini_batch () const
 Return the mini batch stride. More...
 
void set_sample_stride (const int s)
 Set the sample stride. More...
 
int get_sample_stride () const
 Return the sample stride. More...
 
void set_iteration_stride (const int s)
 Set the iteration stride. More...
 
int get_iteration_stride () const
 Return the iteration stride. More...
 
virtual void set_base_offset (const int s)
 Return the base offset. More...
 
int get_base_offset () const
 Return the base offset. More...
 
void set_last_mini_batch_size (const int s)
 Set the last mini batch size. More...
 
int get_last_mini_batch_size () const
 Return the last mini batch size. More...
 
void set_stride_to_last_mini_batch (const int s)
 Set the last mini batch stride. More...
 
int get_stride_to_last_mini_batch () const
 Return the last mini batch stride. More...
 
void set_num_parallel_readers (const int s)
 Set the number of parallel readers per model. More...
 
int get_num_parallel_readers () const
 Return the number of parallel readers per model. More...
 
virtual void set_reset_mini_batch_index (const int s)
 Set the starting mini-batch index for the epoch. More...
 
int get_reset_mini_batch_index () const
 Return the starting mini-batch index for the epoch. More...
 
int get_loaded_mini_batch_index () const
 Return the current mini-batch index for the epoch. More...
 
int get_current_mini_batch_index () const
 Return the current mini-batch index for the epoch. More...
 
void set_initial_position ()
 Set the current position based on the base and model offsets. More...
 
int get_position () const
 Get the current position in the data reader. More...
 
int get_next_position () const
 Get the next position in the data reader. More...
 
int * get_indices ()
 Get a pointer to the start of the shuffled indices. More...
 
virtual int get_num_data () const
 Get the number of samples in this dataset. More...
 
int get_num_unused_data (execution_mode m) const
 Get the number of unused samples in this dataset. More...
 
int * get_unused_data (execution_mode m)
 Get a pointer to the start of the unused sample indices. More...
 
const std::vector< int > & get_unused_indices (execution_mode m)
 
void set_num_iterations_per_epoch (int num_iterations_per_epoch)
 Set the number of iterations in each epoch. More...
 
int get_num_iterations_per_epoch () const
 Get the number of iterations in each epoch. More...
 
int get_current_step_in_epoch () const
 
void resize_shuffled_indices ()
 
void select_subset_of_data ()
 
virtual void use_unused_index_set (execution_mode m)
 
virtual bool has_list_per_model () const
 Does the data reader have a unique sample list per model. More...
 
virtual bool has_list_per_trainer () const
 Does the data reader have a unique sample list per trainer. More...
 
bool save_to_checkpoint_shared (persist &p, execution_mode mode)
 Given directory to store checkpoint files, write state to file and add to number of bytes written. More...
 
bool load_from_checkpoint_shared (persist &p, execution_mode mode)
 Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
 
bool save_to_checkpoint_distributed (persist &p, execution_mode mode)
 
bool load_from_checkpoint_distributed (persist &p, execution_mode mode)
 Given directory to store checkpoint files, read state from file and add to number of bytes read. More...
 
const data_store_conduitget_data_store () const
 returns a const ref to the data store More...
 
data_store_conduitget_data_store ()
 returns a non-const ref to the data store More...
 
data_store_conduitget_data_store_ptr () const
 
void setup_data_store (int mini_batch_size)
 
void instantiate_data_store ()
 
virtual void preload_data_store ()
 
void set_gan_labelling (bool has_gan_labelling)
 
void set_gan_label_value (int gan_label_value)
 
void set_data_store (data_store_conduit *g)
 support of data store functionality More...
 
virtual bool data_store_active () const
 
virtual bool priming_data_store () const
 
virtual void post_update ()
 
void set_transform_pipeline (transform::transform_pipeline &&tp)
 
void print_get_methods (const std::string filename)
 Print the return values from various get_X methods to file. More...
 
size_t get_num_indices_to_use () const
 
void set_use_data_store (bool s)
 

Protected Member Functions

bool fetch_datum (CPUMat &X, int data_id, int mb_idx) override
 Fetch a molecule and its neighbors. More...
 
void fetch_molecule (CPUMat &X, int data_id, int idx, int mb_idx)
 Fetch molecule data_id into X at molecule offset idx. More...
 
- Protected Member Functions inherited from lbann::generic_data_reader
size_t get_absolute_sample_count () const
 
double get_use_fraction () const
 
double get_execution_mode_split_fraction (execution_mode m) const
 
virtual bool fetch_data_block (std::map< data_field_type, CPUMat *> &input_buffers, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched)
 
bool fetch_data_block_conduit (std::vector< conduit::Node > &samples, El::Int block_offset, El::Int block_stride, El::Int mb_size, El::Matrix< El::Int > &indices_fetched)
 
virtual bool fetch_data_field (data_field_type data_field, CPUMat &Y, int data_id, int mb_idx)
 Called by fetch_data, fetch_label, fetch_response. More...
 
virtual bool fetch_conduit_node (conduit::Node &sample, int data_id)
 
virtual bool fetch_label (CPUMat &Y, int data_id, int mb_idx)
 
virtual bool fetch_response (CPUMat &Y, int data_id, int mb_idx)
 
CPUMat create_datum_view (CPUMat &X, const int mb_idx)
 
virtual void preprocess_data_source (int tid)
 
virtual void postprocess_data_source (int tid)
 
virtual void shuffle_indices ()
 Shuffle indices (uses the data_seq_generator) More...
 
virtual void shuffle_indices (rng_gen &gen)
 Shuffle indices and profide a random number generator. More...
 
void error_check_counts () const
 

Protected Attributes

int m_num_samples = 0
 Number of samples. More...
 
int m_num_features = 0
 Number of features in each sample. More...
 
int m_num_samples_per_frame = 0
 Number of samples in each frame (assume constant across all frames). More...
 
int m_num_neighbors
 
int m_max_neighborhood
 
cnpy::NpyArray m_features
 Molecular features. More...
 
cnpy::NpyArray m_neighbors
 Neighbor information (adjacency matrix). More...
 
DataType position_scale_factor = 320.0
 
DataType bond_len_scale_factor = 10.0
 
std::vector< El::Int > m_shape
 support for data_store_pilot2_molecular More...
 
int m_word_size
 support for data_store_pilot2_molecular More...
 
int m_owner
 support for data_store_pilot2_molecular More...
 
int m_neighbors_data_size
 support for data_store_pilot2_molecular More...
 
- Protected Attributes inherited from lbann::generic_data_reader
bool m_verbose = false
 
std::unordered_set< int > m_using_random_node
 
data_store_conduitm_data_store
 
lbann_commm_comm
 
bool m_use_data_store = false
 
std::map< data_field_type, bool > m_supported_input_types
 Holds a true value for each input data type that is supported. Use an ordered map so that checkpoints are stable. More...
 
bool m_gan_labelling
 
int m_gan_label_value
 
observer_ptr< thread_poolm_io_thread_pool
 
bool m_keep_sample_order
 
transform::transform_pipeline m_transform_pipeline
 
bool m_issue_warning
 

Additional Inherited Members

- Public Types inherited from lbann::generic_data_reader
using unused_index_map_t = std::map< execution_mode, std::vector< int > >
 
- Public Attributes inherited from lbann::generic_data_reader
int m_mini_batch_size
 
int m_current_pos
 
int m_stride_to_next_mini_batch
 
int m_base_offset
 
int m_sample_stride
 
int m_iteration_stride
 Stride used by parallel data readers within the model. More...
 
std::vector< int > m_shuffled_indices
 
unused_index_map_t m_unused_indices
 Record of the indicies that are not being used for training. More...
 
int m_last_mini_batch_size
 
int m_stride_to_last_mini_batch
 
int m_reset_mini_batch_index
 The index at which this data reader starts its epoch. More...
 
int m_loaded_mini_batch_idx
 The index of the current mini-batch that has been loaded. More...
 
int m_current_mini_batch_idx
 
int m_num_iterations_per_epoch
 
int m_num_parallel_readers
 How many iterations all readers will execute. More...
 
size_t m_max_files_to_load
 How many parallel readers are being used. More...
 
std::string m_file_dir
 
std::string m_local_file_dir
 
std::string m_data_sample_list
 
std::string m_data_fn
 
std::string m_label_fn
 
bool m_shuffle
 
size_t m_absolute_sample_count
 
std::map< execution_mode, double > m_execution_mode_split_fraction
 
double m_use_fraction
 
int m_first_n
 
std::string m_role
 

Detailed Description

Data reader for loading Pilot 2 molecular data.

Definition at line 41 of file data_reader_pilot2_molecular.hpp.

Constructor & Destructor Documentation

◆ pilot2_molecular_reader() [1/2]

lbann::pilot2_molecular_reader::pilot2_molecular_reader ( int  num_neighbors,
int  max_neighborhood,
bool  shuffle = true 
)
Here is the caller graph for this function:

◆ pilot2_molecular_reader() [2/2]

lbann::pilot2_molecular_reader::pilot2_molecular_reader ( const pilot2_molecular_reader )
default

◆ ~pilot2_molecular_reader()

lbann::pilot2_molecular_reader::~pilot2_molecular_reader ( )
inlineoverride

Definition at line 49 of file data_reader_pilot2_molecular.hpp.

Member Function Documentation

◆ copy()

pilot2_molecular_reader* lbann::pilot2_molecular_reader::copy ( ) const
inlineoverridevirtual

Implements lbann::generic_data_reader.

Definition at line 50 of file data_reader_pilot2_molecular.hpp.

Here is the call graph for this function:

◆ fetch_datum()

bool lbann::pilot2_molecular_reader::fetch_datum ( CPUMat X,
int  data_id,
int  mb_idx 
)
overrideprotectedvirtual

Fetch a molecule and its neighbors.

Reimplemented from lbann::generic_data_reader.

Here is the caller graph for this function:

◆ fetch_molecule()

void lbann::pilot2_molecular_reader::fetch_molecule ( CPUMat X,
int  data_id,
int  idx,
int  mb_idx 
)
protected

Fetch molecule data_id into X at molecule offset idx.

Here is the caller graph for this function:

◆ get_data_dims()

const std::vector<El::Int> lbann::pilot2_molecular_reader::get_data_dims ( ) const
inlineoverridevirtual

Get the dimensions of the data.

Reimplemented from lbann::generic_data_reader.

Definition at line 62 of file data_reader_pilot2_molecular.hpp.

◆ get_features_4()

float* lbann::pilot2_molecular_reader::get_features_4 ( )
inline

support for data_store_pilot2_molecular

Definition at line 89 of file data_reader_pilot2_molecular.hpp.

◆ get_features_8()

double* lbann::pilot2_molecular_reader::get_features_8 ( )
inline

Definition at line 90 of file data_reader_pilot2_molecular.hpp.

◆ get_frame()

int lbann::pilot2_molecular_reader::get_frame ( int  data_id) const
inline

Return the frame data_id is in. (made public to support data_store_pilot2_molecular)

Definition at line 102 of file data_reader_pilot2_molecular.hpp.

◆ get_linearized_data_size()

int lbann::pilot2_molecular_reader::get_linearized_data_size ( ) const
inlineoverridevirtual

Get the linearized size (i.e. number of elements) in a sample.

Reimplemented from lbann::generic_data_reader.

Definition at line 58 of file data_reader_pilot2_molecular.hpp.

◆ get_max_neighborhood()

int lbann::pilot2_molecular_reader::get_max_neighborhood ( ) const
inline

support for data_store_pilot2_molecular

Definition at line 108 of file data_reader_pilot2_molecular.hpp.

◆ get_neighbors_4()

float* lbann::pilot2_molecular_reader::get_neighbors_4 ( )
inline

Definition at line 91 of file data_reader_pilot2_molecular.hpp.

◆ get_neighbors_8()

double* lbann::pilot2_molecular_reader::get_neighbors_8 ( )
inline

Definition at line 92 of file data_reader_pilot2_molecular.hpp.

◆ get_neighbors_data_size()

int lbann::pilot2_molecular_reader::get_neighbors_data_size ( )
inline

support for data_store_pilot2_molecular

Definition at line 114 of file data_reader_pilot2_molecular.hpp.

Here is the call graph for this function:

◆ get_num_features()

int lbann::pilot2_molecular_reader::get_num_features ( ) const
inline

support for data_store_pilot2_molecular

Definition at line 111 of file data_reader_pilot2_molecular.hpp.

◆ get_num_neighbors()

int lbann::pilot2_molecular_reader::get_num_neighbors ( ) const
inline

support for data_store_pilot2_molecular

Definition at line 98 of file data_reader_pilot2_molecular.hpp.

◆ get_num_samples_per_frame()

int lbann::pilot2_molecular_reader::get_num_samples_per_frame ( ) const
inline

support for data_store_pilot2_molecular

Definition at line 105 of file data_reader_pilot2_molecular.hpp.

◆ get_type()

std::string lbann::pilot2_molecular_reader::get_type ( ) const
inlineoverridevirtual

Return this data_reader's type

Implements lbann::generic_data_reader.

Definition at line 54 of file data_reader_pilot2_molecular.hpp.

Here is the call graph for this function:

◆ get_word_size()

int lbann::pilot2_molecular_reader::get_word_size ( ) const
inline

support for data_store_pilot2_molecular

Definition at line 95 of file data_reader_pilot2_molecular.hpp.

◆ load()

void lbann::pilot2_molecular_reader::load ( )
overridevirtual

Load the dataset. Each data reader implementation should implement this to initialize its internal data structures, determine the number of samples and their dimensionality (if needed), and set up and shuffle samples.

Implements lbann::generic_data_reader.

Here is the caller graph for this function:

◆ operator=()

pilot2_molecular_reader& lbann::pilot2_molecular_reader::operator= ( const pilot2_molecular_reader )
default

◆ scale_data()

template<class T >
T lbann::pilot2_molecular_reader::scale_data ( int  idx,
datum 
)
inline

Data format is: [Frames (2900), Molecules (3040), Beads (12), ['x', 'y', 'z', 'CHOL', 'DPPC', 'DIPC', 'Head', 'Tail', 'BL1', 'BL2', 'BL3', 'BL4', 'BL5', 'BL6', 'BL7', 'BL8', 'BL9', 'BL10', 'BL11', 'BL12'] (20)]

x,y,z

Definition at line 75 of file data_reader_pilot2_molecular.hpp.

Member Data Documentation

◆ bond_len_scale_factor

DataType lbann::pilot2_molecular_reader::bond_len_scale_factor = 10.0
protected

Definition at line 138 of file data_reader_pilot2_molecular.hpp.

◆ m_features

cnpy::NpyArray lbann::pilot2_molecular_reader::m_features
protected

Molecular features.

Definition at line 133 of file data_reader_pilot2_molecular.hpp.

◆ m_max_neighborhood

int lbann::pilot2_molecular_reader::m_max_neighborhood
protected

Definition at line 131 of file data_reader_pilot2_molecular.hpp.

◆ m_neighbors

cnpy::NpyArray lbann::pilot2_molecular_reader::m_neighbors
protected

Neighbor information (adjacency matrix).

Definition at line 135 of file data_reader_pilot2_molecular.hpp.

◆ m_neighbors_data_size

int lbann::pilot2_molecular_reader::m_neighbors_data_size
protected

support for data_store_pilot2_molecular

Definition at line 148 of file data_reader_pilot2_molecular.hpp.

◆ m_num_features

int lbann::pilot2_molecular_reader::m_num_features = 0
protected

Number of features in each sample.

Definition at line 125 of file data_reader_pilot2_molecular.hpp.

◆ m_num_neighbors

int lbann::pilot2_molecular_reader::m_num_neighbors
protected

Definition at line 129 of file data_reader_pilot2_molecular.hpp.

◆ m_num_samples

int lbann::pilot2_molecular_reader::m_num_samples = 0
protected

Number of samples.

Definition at line 123 of file data_reader_pilot2_molecular.hpp.

◆ m_num_samples_per_frame

int lbann::pilot2_molecular_reader::m_num_samples_per_frame = 0
protected

Number of samples in each frame (assume constant across all frames).

Definition at line 127 of file data_reader_pilot2_molecular.hpp.

◆ m_owner

int lbann::pilot2_molecular_reader::m_owner
protected

support for data_store_pilot2_molecular

Definition at line 146 of file data_reader_pilot2_molecular.hpp.

◆ m_shape

std::vector<El::Int> lbann::pilot2_molecular_reader::m_shape
protected

support for data_store_pilot2_molecular

Definition at line 141 of file data_reader_pilot2_molecular.hpp.

◆ m_word_size

int lbann::pilot2_molecular_reader::m_word_size
protected

support for data_store_pilot2_molecular

Definition at line 144 of file data_reader_pilot2_molecular.hpp.

◆ position_scale_factor

DataType lbann::pilot2_molecular_reader::position_scale_factor = 320.0
protected

Definition at line 137 of file data_reader_pilot2_molecular.hpp.


The documentation for this class was generated from the following file: