27 #ifndef LBANN_DATA_READERS_SAMPLE_LIST_HPP 28 #define LBANN_DATA_READERS_SAMPLE_LIST_HPP 46 "MULTI-SAMPLE_INCLUSION_V2";
88 template <
class Archive>
92 template <
typename sample_name_t>
101 using sample_t = std::template pair<sample_file_id_t, sample_name_t>;
121 void load(std::istream& istrm,
size_t stride = 1,
size_t offset = 0);
127 void load(
const std::string& samplelist_file,
130 void load(std::istream& istrm,
const lbann_comm& comm,
bool interleave);
139 void load_from_string(
const std::string& samplelist,
144 virtual size_t size()
const;
153 template <
class Archive>
157 virtual bool to_string(std::string& sstr)
const;
160 void write(
const std::string filename)
const;
169 const sample_t& operator[](
size_t idx)
const;
173 const std::string& get_samples_dirname()
const;
176 void all_gather_archive(
const std::string& archive,
177 std::vector<std::string>& gathered_archive,
179 void all_gather_archive_new(
const std::string& archive,
180 std::vector<std::string>& gathered_archive,
183 template <
typename T>
185 all_gather_field(T
data, std::vector<T>& gathered_data,
lbann_comm& comm);
186 virtual void all_gather_packed_lists(
lbann_comm& comm);
189 void keep_sample_order(
bool keep);
196 void set_data_file_check();
198 void unset_data_file_check();
201 void build_sample_map_from_name_to_index();
204 void clear_sample_map_from_name_to_index();
212 std::string read_header_line(std::istream& ifs,
213 const std::string& listname,
214 const std::string& info);
217 void read_header(std::istream& istrm);
222 read_sample_list(std::istream& istrm,
size_t stride = 1,
size_t offset = 0);
226 virtual void assign_samples_name();
229 size_t get_samples_per_file(std::istream& istrm,
234 void write_header(std::string& sstr,
size_t num_files)
const;
238 get_num_samples(
size_t& total,
size_t& included,
size_t& excluded)
const;
241 const std::string& filename);
244 virtual void reorder();
272 template <
typename T>
277 #endif // LBANN_DATA_READERS_SAMPLE_LIST_HPP
size_t m_stride
The stride used in loading sample list file.
std::size_t sample_file_id_t
The type for the index assigned to each sample file.
sample_list_header m_header
header info of sample list
std::template vector< sample_t > samples_t
Type for the list of samples.
static const std::string multi_sample_inclusion
void write(std::ostream &os, google::protobuf::Message const &msg)
Write the protobuf message in prototext in a stream.
T uninitialized_sample_name()
std::unordered_map< long long, sample_idx_t > sample_map_t
Type for the map from sample name to the sample list index.
T & data(const cnpy::NpyArray &na, const std::vector< size_t > indices)
sample_map_t m_map_name_to_idx
Map from sample name to the corresponding index into the sample list.
void load(std::string const &pbuf_filename, google::protobuf::Message &msg)
Fill the protobuf message from a binary file.
static const std::string conduit_hdf5_inclusion
std::string to_string(El::Device const &d)
static const std::string conduit_hdf5_exclusion
static const std::string multi_sample_exclusion
bool m_keep_order
maintain the original sample order as listed in the file
std::template pair< sample_file_id_t, long long > sample_t
samples_t m_sample_list
List of all samples with a file identifier and sample name for each sample.
static const std::string multi_sample_inclusion_v2
bool m_check_data_file
Whether to check the existence of data file.
void handle_mpi_error(int ierr)
file_id_stats_v_t m_file_id_stats_map
Maps sample's file id to file names, file descriptors, and use counts.
std::vector< std::string > file_id_stats_v_t
Mapping of the file index to the filename.
static const std::string single_sample
typename samples_t::size_type sample_idx_t
Type for the index into the sample list.