LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
base.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_BASE_HPP_INCLUDED
28 #define LBANN_BASE_HPP_INCLUDED
29 
30 #include <El.hpp>
31 
32 // Defines, among other things, DataType.
33 #include "lbann_config.hpp"
34 
38 
39 // Support for OpenMP macros
41 
42 #include <functional>
43 #include <iostream>
44 #include <memory>
45 #include <string>
46 
47 namespace lbann {
48 
49 // Forward-declaration.
50 class lbann_comm;
51 
53 template <typename T>
54 using observer_ptr = typename std::add_pointer<T>::type;
55 
56 // Note that this should only be used to wrap the thing coming out of
57 // initialize()! This will be removed when we have proper RAII around
58 // these things.
59 using world_comm_ptr =
60  std::unique_ptr<lbann_comm, std::function<void(lbann_comm*)>>;
61 
73 world_comm_ptr initialize(int& argc, char**& argv);
74 
80 std::unique_ptr<lbann_comm> initialize_lbann(int argc, char** argv);
81 
86 std::unique_ptr<lbann_comm> initialize_lbann(MPI_Comm c);
87 
92 std::unique_ptr<lbann_comm> initialize_lbann(El::mpi::Comm&& c);
93 
97 void finalize_lbann(lbann_comm* comm = nullptr);
98 
104 void finalize(lbann_comm* comm = nullptr);
105 
106 #ifdef LBANN_HAS_HALF
107 using cpu_fp16 = El::cpu_half_type;
108 #endif
109 
110 #ifdef LBANN_HAS_GPU_FP16
111 using fp16 = El::gpu_half_type;
112 #endif
113 
114 // Typedefs for Elemental matrices
115 using AbsMat = El::AbstractMatrix<DataType>;
116 using CPUMat = El::Matrix<DataType, El::Device::CPU>;
117 #ifdef LBANN_HAS_GPU
118 using GPUMat = El::Matrix<DataType, El::Device::GPU>;
119 #endif // LBANN_HAS_GPU
120 using AbsDistMat = El::AbstractDistMatrix<DataType>;
121 using BaseDistMat = El::BaseDistMatrix;
122 
123 // Deprecated typedefs
125 using EGrid = El::Grid;
126 using Grid = El::Grid;
127 template <El::Device D>
128 using DMat = El::Matrix<DataType, D>;
129 template <El::Device D>
130 using AbsDistMatReadProxy = El::AbstractDistMatrixReadDeviceProxy<DataType, D>;
131 using ElMat = El::ElementalMatrix<DataType>;
132 using BlockMat = El::BlockMatrix<DataType>;
133 
134 template <typename TensorDataType>
135 using CPUMatDT = El::Matrix<TensorDataType, El::Device::CPU>;
136 
137 template <typename TensorDataType, El::Device D>
138 using MCMRMatDT =
139  El::DistMatrix<TensorDataType, El::MC, El::MR, El::ELEMENT, D>;
140 template <typename TensorDataType, El::Device D>
141 using CircMatDT =
142  El::DistMatrix<TensorDataType, El::CIRC, El::CIRC, El::ELEMENT, D>;
143 template <typename TensorDataType, El::Device D>
144 using StarMatDT =
145  El::DistMatrix<TensorDataType, El::STAR, El::STAR, El::ELEMENT, D>;
146 template <typename TensorDataType, El::Device D>
147 using StarVCMatDT =
148  El::DistMatrix<TensorDataType, El::STAR, El::VC, El::ELEMENT, D>;
149 template <typename TensorDataType, El::Device D>
150 using VCStarMatDT = El::DistMatrix<TensorDataType,
151  El::VC,
152  El::STAR,
153  El::ELEMENT,
154  D>;
155 template <typename TensorDataType, El::Device D>
156 using MCStarMatDT = El::
157  DistMatrix<TensorDataType, El::MC, El::STAR, El::ELEMENT, D>;
158 template <typename TensorDataType, El::Device D>
159 using MRStarMatDT = El::
160  DistMatrix<TensorDataType, El::MR, El::STAR, El::ELEMENT, D>;
161 template <typename TensorDataType, El::Device D>
162 using StarMRMatDT =
163  El::DistMatrix<TensorDataType, El::STAR, El::MR, El::ELEMENT, D>;
164 template <typename TensorDataType>
166 
167 template <El::Device D>
169 template <El::Device D>
171 template <El::Device D>
173 template <El::Device D>
175 template <El::Device D>
177 template <El::Device D>
179 template <El::Device D>
181 template <El::Device D>
184 using Mat =
185  El::Matrix<DataType, El::Device::CPU>; // Temporarily define as CPUMat
186 
187 // Datatype for model evaluation
188 // Examples: timing, metrics, objective functions
189 using EvalType = double;
190 
192 enum class matrix_format
193 {
194  MC_MR,
195  CIRC_CIRC,
196  STAR_STAR,
197  STAR_VC,
198  MC_STAR,
199  invalid
200 };
201 
204 {
205  PROPAGATE_NOTHING = 0, // Stop gradient computation to parents (including
206  // error signals)
207  ERROR_SIGNALS = 1, // Error signals from child layers
208  PREV_ACTIVATIONS = 2, // Input activations from forward pass
209  ACTIVATIONS = 4, // Output activations from forward pass
210  WEIGHTS = 8, // Weights
211 };
212 
214 std::string to_string(El::Device const& d);
215 El::Device device_from_string(std::string const& str);
216 
218 enum class data_layout
219 {
222  invalid
223 };
225 std::string to_string(data_layout const& dl);
226 data_layout data_layout_from_string(std::string const& str);
227 
229 enum class execution_mode
230 {
231  training,
232  validation,
233  testing,
234  prediction,
235  tournament,
236  inference,
237  invalid
238 };
239 std::string to_string(execution_mode m);
243 
245 execution_mode exec_mode_from_string(std::string const& str);
246 
247 /*
248  * endsWith:
249  * http://thispointer.com/c-how-to-check-if-a-string-ends-with-an-another-given-string/
250  * Case Sensitive Implementation of endsWith()
251  * It checks if the string 'mainStr' ends with given string
252  * 'toMatch'
253  */
254 bool endsWith(const std::string mainStr, const std::string& toMatch);
255 
257 void print_matrix_dims(AbsDistMat* m, const char* name);
258 #define LBANN_PRINT_MATRIX_DIMS(x) print_matrix_dims(x, #x);
259 
261 void print_local_matrix_dims(AbsMat* m, const char* name);
262 #define LBANN_PRINT_LOCAL_MATRIX_DIMS(x) print_local_matrix_dims(x, #x);
263 
264 #define LBANN_MAKE_STR_(x) #x
265 #define LBANN_MAKE_STR(x) LBANN_MAKE_STR_(x)
266 
267 void lbann_mpi_err_handler(MPI_Comm* comm, int* err_code, ...);
268 
269 } // namespace lbann
270 
272 std::istream& operator>>(std::istream& os, lbann::execution_mode& e);
273 
274 #endif // LBANN_BASE_HPP_INCLUDED
MCMRMatDT< TensorDataType, El::Device::CPU > DistMatDT
Definition: base.hpp:165
MCMRMatDT< DataType, D > MCMRMat
Definition: base.hpp:168
El::DistMatrix< TensorDataType, El::STAR, El::STAR, El::ELEMENT, D > StarMatDT
Definition: base.hpp:145
VCStarMatDT< DataType, D > VCStarMat
Definition: base.hpp:176
El::Matrix< DataType, D > DMat
Definition: base.hpp:128
El::AbstractDistMatrixReadDeviceProxy< DataType, D > AbsDistMatReadProxy
Definition: base.hpp:130
StarMRMatDT< DataType, D > StarMRMat
ColSumMat.
Definition: base.hpp:182
El::AbstractDistMatrix< DataType > AbsDistMat
Definition: base.hpp:120
El::Grid Grid
Definition: base.hpp:126
El::DistMatrix< TensorDataType, El::STAR, El::VC, El::ELEMENT, D > StarVCMatDT
Definition: base.hpp:148
MCMRMat< El::Device::CPU > DistMat
Definition: base.hpp:183
Create an iterator that goes over a contiguous (unit-step) enum class.
El::DistMatrix< TensorDataType, El::MR, El::STAR, El::ELEMENT, D > MRStarMatDT
RowSumMat.
Definition: base.hpp:160
BackpropRequirements
Backpropagation requirements from a layer or operator.
Definition: base.hpp:203
El::DistMatrix< TensorDataType, El::STAR, El::MR, El::ELEMENT, D > StarMRMatDT
ColSumMat.
Definition: base.hpp:163
data_layout data_layout_from_string(std::string const &str)
matrix_format data_layout_to_matrix_format(data_layout layout)
execution_mode exec_mode_from_string(std::string const &str)
Convert a string to an execution_mode.
std::unique_ptr< lbann_comm > initialize_lbann(int argc, char **argv)
Initialize LBANN for use with external applcations.
std::unique_ptr< lbann_comm, std::function< void(lbann_comm *)> > world_comm_ptr
Definition: base.hpp:60
El::DistMatrix< TensorDataType, El::MC, El::STAR, El::ELEMENT, D > MCStarMatDT
ColSumStarVCMat.
Definition: base.hpp:157
El::BlockMatrix< DataType > BlockMat
Definition: base.hpp:132
CircMatDT< DataType, D > CircMat
Definition: base.hpp:170
El::DistMatrix< TensorDataType, El::VC, El::STAR, El::ELEMENT, D > VCStarMatDT
Definition: base.hpp:154
constexpr El::Device Device
matrix_format
Distributed matrix format.
Definition: base.hpp:192
std::string to_string(El::Device const &d)
El::ElementalMatrix< DataType > ElMat
Definition: base.hpp:131
El::Device device_from_string(std::string const &str)
std::basic_istream< CharT > & operator>>(std::basic_istream< CharT > &is, beta_distribution< RealType > &d)
Definition: beta.hpp:236
El::Matrix< TensorDataType, El::Device::CPU > CPUMatDT
Definition: base.hpp:135
StarVCMatDT< DataType, D > StarVCMat
Definition: base.hpp:174
El::Matrix< DataType, El::Device::CPU > CPUMat
Definition: base.hpp:116
StarMatDT< DataType, D > StarMat
Definition: base.hpp:172
typename std::add_pointer< T >::type observer_ptr
Creating an observer_ptr to complement the unique_ptr and shared_ptr.
Definition: base.hpp:54
execution_mode
Neural network execution mode.
Definition: base.hpp:229
El::Grid EGrid
Definition: base.hpp:125
El::AbstractMatrix< DataType > AbsMat
Definition: base.hpp:115
bool endsWith(const std::string mainStr, const std::string &toMatch)
El::DistMatrix< TensorDataType, El::MC, El::MR, El::ELEMENT, D > MCMRMatDT
Definition: base.hpp:139
void print_matrix_dims(AbsDistMat *m, const char *name)
Print the dimensions and name of a Elemental matrix.
world_comm_ptr initialize(int &argc, char **&argv)
MRStarMatDT< DataType, D > MRStarMat
RowSumMat.
Definition: base.hpp:180
El::DistMatrix< TensorDataType, El::CIRC, El::CIRC, El::ELEMENT, D > CircMatDT
Definition: base.hpp:142
El::Matrix< DataType, El::Device::CPU > Mat
Definition: base.hpp:185
El::BaseDistMatrix BaseDistMat
Definition: base.hpp:121
data_layout
Data layout that is optimized for different modes of parallelism.
Definition: base.hpp:218
void lbann_mpi_err_handler(MPI_Comm *comm, int *err_code,...)
MCStarMatDT< DataType, D > MCStarMat
ColSumStarVCMat.
Definition: base.hpp:178
void finalize_lbann(lbann_comm *comm=nullptr)
Destroy LBANN communicator for external application.
void finalize(lbann_comm *comm=nullptr)
void print_local_matrix_dims(AbsMat *m, const char *name)
Print the dimensions and name of a Elemental matrix.
double EvalType
Definition: base.hpp:189