29 #ifndef LBANN_PERSIST_H 30 #define LBANN_PERSIST_H 60 inline std::string
to_string(persist_type pt);
87 template <
class Archive>
94 void open_checkpoint_dir(
const std::string& dir,
bool create_dir);
95 void open_checkpoint(
const std::string& dir,
bool create_dir);
96 void close_checkpoint();
98 void open_restart(
const std::string& dir);
105 for (
auto& pt : m_bytes) {
113 for (
auto& pt : m_bytes) {
118 template <
typename TensorDataType>
119 bool write_rank_distmat(persist_type type,
121 const El::AbstractDistMatrix<TensorDataType>& M);
122 template <
typename TensorDataType>
123 bool read_rank_distmat(persist_type type,
125 El::AbstractDistMatrix<TensorDataType>& M);
127 template <
typename TensorDataType>
128 bool write_distmat(persist_type type,
130 El::AbstractDistMatrix<TensorDataType>* M);
131 template <
typename TensorDataType>
132 bool read_distmat(persist_type type,
134 El::AbstractDistMatrix<TensorDataType>* M);
138 std::string get_filename(persist_type type)
const;
141 bool write_bytes(
int fd,
const char* name,
const void* buf,
size_t size);
142 bool read_bytes(
int fd,
const char* name,
void* buf,
size_t size);
144 bool write_string(
int fd,
const char* name,
const char* buf,
size_t size);
145 bool read_string(
int fd,
const char* name,
char* buf,
size_t size);
151 : std::runtime_error(std::string(
"Archive file not found: ") + filename)
155 template <
typename C>
158 template <
typename C>
161 template <
typename C>
165 const std::string& suffix);
167 template <
typename C>
171 const std::string& suffix);
173 template <
typename C>
176 template <
typename C>
179 template <
typename C>
183 const std::string& suffix);
185 template <
typename C>
189 const std::string& suffix);
191 template <
typename C>
194 template <
typename C>
197 template <
typename C>
200 const std::string& filename);
202 template <
typename C>
206 const std::string& filename);
208 template <
typename C>
213 const std::string& suffix);
215 template <
typename C>
220 const std::string& suffix);
222 #ifndef LBANN_PERSIST_INSTANTIATE 224 extern template bool persist::write_rank_distmat<T>( \ 227 const El::AbstractDistMatrix<T>& M); \ 228 extern template bool persist::read_rank_distmat<T>( \ 231 El::AbstractDistMatrix<T>& M); \ 232 extern template bool persist::write_distmat<T>( \ 235 El::AbstractDistMatrix<T>* M); \ 236 extern template bool persist::read_distmat<T>(persist_type type, \ 238 El::AbstractDistMatrix<T>* M) 240 #define LBANN_INSTANTIATE_CPU_HALF 241 #define LBANN_INSTANTIATE_GPU_HALF 244 #undef LBANN_INSTANTIATE_CPU_HALF 245 #undef LBANN_INSTANTIATE_GPU_HALF 246 #endif // LBANN_PERSIST_INSTANTIATE 250 #endif // LBANN_PERSIST_H
Create an iterator that goes over a contiguous (unit-step) enum class.
void serialize(std::ostream &os, google::protobuf::Message const &msg)
Serialize the protobuf message to a stream.
persist_type execution_mode_to_persist_type(execution_mode m)
const std::string & get_checkpoint_dir() const
uint64_t get_bytes() const
std::map< persist_type, uint64_t > m_bytes
void read_cereal_archive(C &obj, const std::string &filename)
std::string to_string(El::Device const &d)
void load_from_shared_cereal_archive(C &obj, lbann_comm &comm, const std::string &filename)
NonexistentArchiveFile(std::string const &filename)
std::string create_cereal_archive_binary_string(C &obj)
callback_type get_cb_type() const
bool read_bytes(int fd, const char *name, void *buf, size_t size)
void set_restart_dir(const std::string &dir)
execution_mode
Neural network execution mode.
bool create_dir(const std::string output_dir)
bool read_string(int fd, const char *name, char *buf, size_t size)
bool write_string(int fd, const char *name, const char *buf, size_t size)
bool write_bytes(int fd, const char *name, const void *buf, size_t size)
void write_cereal_archive(C &obj, const std::string &filename)
std::map< persist_type, std::string > m_filenames
void set_cb_type(callback_type type)
std::string m_checkpoint_dir
void unpack_cereal_archive_binary_string(C &obj, const std::string &buf)