LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
options.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 #ifndef LBANN_UTILS_OPTIONS_HPP_INCLUDED
27 #define LBANN_UTILS_OPTIONS_HPP_INCLUDED
28 
30 
31 #include <iostream>
32 #include <map>
33 #include <string>
34 #include <vector>
35 
36 namespace lbann {
37 
38 /****** std options ******/
39 // Bool flags
40 #define LBANN_OPTION_DISABLE_BACKGROUND_IO_ACTIVITY \
41  "disable_background_io_activity"
42 #define LBANN_OPTION_DISABLE_CUDA "disable_cuda"
43 #define LBANN_OPTION_DISABLE_SIGNAL_HANDLER "disable_signal_handler"
44 #define LBANN_OPTION_EXIT_AFTER_SETUP "exit_after_setup"
45 #define LBANN_OPTION_GENERATE_MULTI_PROTO "generate_multi_proto"
46 #define LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR_IS_COMPLETE \
47  "load_model_weights_dir_is_complete"
48 // Deprecated -- "LTFB Callback"
49 #define LBANN_OPTION_LTFB_ALLOW_GLOBAL_STATISTICS "LTFB Allow global statistics"
50 // Deprecated -- "LTFB Callback"
51 #define LBANN_OPTION_LTFB_VERBOSE "ltfb_verbose"
52 #define LBANN_OPTION_MULTITRAINER_VERBOSE "multitrainer_verbose"
53 #define LBANN_OPTION_ALLOW_MULTITRAINER_GLOBAL_STATISTICS \
54  "Allow multitrainer global statistics"
55 #define LBANN_OPTION_PRELOAD_DATA_STORE "preload_data_store"
56 #define LBANN_OPTION_PRINT_AFFINITY "print_affinity"
57 #define LBANN_OPTION_SERIALIZE_IO "serialize_io"
58 #define LBANN_OPTION_STACK_TRACE_TO_FILE "stack_trace_to_file"
59 #define LBANN_OPTION_USE_CUBLAS_TENSOR_OPS "use_cublas_tensor_ops"
60 #define LBANN_OPTION_USE_CUDNN_TENSOR_OPS "use_cudnn_tensor_ops"
61 #define LBANN_OPTION_USE_DATA_STORE "use_data_store"
62 #define LBANN_OPTION_VERBOSE "verbose"
63 #define LBANN_OPTION_USE_GPU_DEFAULT_MEMORY_IN_FORWARD_PROP \
64  "Use Hydrogen's default memory mode for GPU buffers in forward prop"
65 #define LBANN_OPTION_INIT_SHMEM "Initialize SHMEM when initializing LBANN"
66 #define LBANN_OPTION_INIT_NVSHMEM "Initialize NVSHMEM when initializing LBANN"
67 #define LBANN_OPTION_NO_INPLACE "no_inplace"
68 #define LBANN_OPTION_NO_BACKPROP_DISABLE "no_backprop_disable"
69 
70 // Input options
71 #define LBANN_OPTION_CKPT_DIR "ckpt_dir"
72 #define LBANN_OPTION_HYDROGEN_BLOCK_SIZE "hydrogen_block_size"
73 #define LBANN_OPTION_LOAD_MODEL_WEIGHTS_DIR "load_model_weights_dir"
74 #define LBANN_OPTION_MAX_RNG_SEEDS_DISPLAY "RNG seeds per trainer to display"
75 #define LBANN_OPTION_METADATA "metadata"
76 #define LBANN_OPTION_MINI_BATCH_SIZE "mini_batch_size"
77 #define LBANN_OPTION_MODEL "model"
78 #define LBANN_OPTION_NUM_EPOCHS "num_epochs"
79 #define LBANN_OPTION_NUM_IO_THREADS "Num. IO threads"
80 #define LBANN_OPTION_NUM_PARALLEL_READERS "num_parallel_readers"
81 #define LBANN_OPTION_OPTIMIZER "optimizer"
82 #define LBANN_OPTION_PROCS_PER_TRAINER "Processes per trainer"
83 #define LBANN_OPTION_PROTOTEXT "prototext"
84 #define LBANN_OPTION_RANDOM_SEED "random_seed"
85 #define LBANN_OPTION_READER "reader"
86 #define LBANN_OPTION_RESTART_DIR "restart_dir"
87 #define LBANN_OPTION_TRAINER_CREATE_TWO_MODELS \
88  "Create two models in Sub-grid parallelism"
89 #define LBANN_OPTION_TRAINER_GRID_HEIGHT \
90  "Height of 2D process grid for each trainer"
91 #define LBANN_OPTION_TRAINER_PRIMARY_GRID_SIZE "Primary Grid Size per trainer"
92 #define LBANN_OPTION_TRAINER_ENABLE_SUBGRID_ASYNC_COMM \
93  "Enable async communication in Sub-grid parallelism"
94 #define LBANN_OPTION_TRAINER_ENABLE_TOPO_AWARE_SUBGRID \
95  "Enable topology aware process placement in Sub-grid parallelism"
96 #define LBANN_OPTION_NUM_SUBGRIDS_BLOCK_ORDER \
97  "Divide each trainer into equally-sized sub-grids with blocked ordering"
98 #ifdef LBANN_HAS_CALIPER
99 #define LBANN_OPTION_USE_CALIPER "use caliper"
100 #define LBANN_OPTION_CALIPER_CONFIG "caliper_config"
101 #endif
102 
103 /****** datastore options ******/
104 // Bool flags
105 #define LBANN_OPTION_DATA_STORE_CACHE "data_store_cache"
106 #define LBANN_OPTION_DATA_STORE_DEBUG "data_store_debug"
107 #define LBANN_OPTION_DATA_STORE_FAIL "data_store_fail"
108 #define LBANN_OPTION_DATA_STORE_MIN_MAX_TIMING "data_store_min_max_timing"
109 #define LBANN_OPTION_DATA_STORE_NO_THREAD "data_store_no_thread"
110 #define LBANN_OPTION_DATA_STORE_PROFILE "data_store_profile"
111 #define LBANN_OPTION_DATA_STORE_TEST_CACHE "data_store_test_cache"
112 #define LBANN_OPTION_NODE_SIZES_VARY "node_sizes_vary"
113 
114 // Input options
115 #define LBANN_OPTION_DATA_STORE_SPILL "data_store_spill"
116 #define LBANN_OPTION_DATA_STORE_TEST_CHECKPOINT "data_store_test_checkpoint"
117 
118 /****** datareader options ******/
119 // Bool flags
120 #define LBANN_OPTION_CHECK_DATA "check_data"
121 #define LBANN_OPTION_KEEP_SAMPLE_ORDER "keep_sample_order"
122 #define LBANN_OPTION_KEEP_PACKED_FIELDS "keep_packed_fields"
123 #define LBANN_OPTION_LOAD_FULL_SAMPLE_LIST_ONCE "load_full_sample_list_once"
124 #define LBANN_OPTION_QUIET "quiet"
125 #define LBANN_OPTION_WRITE_SAMPLE_LABEL_LIST "write_sample_label_list"
126 #define LBANN_OPTION_WRITE_SAMPLE_LIST "write_sample_list"
127 #define LBANN_OPTION_Z_SCORE "z_score"
128 
129 // Input options
130 #define LBANN_OPTION_ABSOLUTE_SAMPLE_COUNT "absolute_sample_count"
131 #define LBANN_OPTION_DATA_FILEDIR "data_filedir"
132 #define LBANN_OPTION_DATA_FILEDIR_TEST "data_filedir_test"
133 #define LBANN_OPTION_DATA_FILEDIR_TRAIN "data_filedir_train"
134 #define LBANN_OPTION_DATA_FILEDIR_VALIDATE "data_filedir_validate"
135 #define LBANN_OPTION_DATA_FILENAME_TEST "data_filename_test"
136 #define LBANN_OPTION_DATA_FILENAME_TRAIN "data_filename_train"
137 #define LBANN_OPTION_DATA_FILENAME_VALIDATE "data_filename_validate"
138 #define LBANN_OPTION_DATA_READER_FRACTION "data_reader_fraction"
139 #define LBANN_OPTION_LABEL_FILENAME_TEST "label_filename_test"
140 #define LBANN_OPTION_LABEL_FILENAME_TRAIN "label_filename_train"
141 #define LBANN_OPTION_LABEL_FILENAME_VALIDATE "label_filename_validate"
142 #define LBANN_OPTION_NORMALIZATION "normalization"
143 #define LBANN_OPTION_PILOT2_READ_FILE_SIZES "pilot2_read_file_sizes"
144 #define LBANN_OPTION_PILOT2_SAVE_FILE_SIZES "pilot2_save_file_sizes"
145 #define LBANN_OPTION_SAMPLE_LIST_TEST "sample_list_test"
146 #define LBANN_OPTION_SAMPLE_LIST_TRAIN "sample_list_train"
147 #define LBANN_OPTION_SAMPLE_LIST_VALIDATE "sample_list_validate"
148 #define LBANN_OPTION_SEQUENCE_LENGTH "sequence_length"
149 #define LBANN_OPTION_SMILES_BUFFER_SIZE "smiles_buffer_size"
150 #define LBANN_OPTION_VOCAB "vocab"
151 
152 /****** jag options ******/
153 // Bool flags
154 #define LBANN_OPTION_JAG "jag"
155 
156 // Input options
157 #define LBANN_OPTION_BASE_DIR "base_dir"
158 #define LBANN_OPTION_FILELIST "filelist"
159 #define LBANN_OPTION_FILENAME "filename"
160 #define LBANN_OPTION_FORMAT "format"
161 #define LBANN_OPTION_INDEX_FN "index_fn"
162 #define LBANN_OPTION_MAPPING_FN "mapping_fn"
163 #define LBANN_OPTION_NUM_LISTS "num_lists"
164 #define LBANN_OPTION_NUM_SAMPLES "num_samples"
165 #define LBANN_OPTION_NUM_SAMPLES_PER_FILE "num_samples_per_file"
166 #define LBANN_OPTION_NUM_SAMPLES_PER_LIST "num_samples_per_list"
167 #define LBANN_OPTION_NUM_SUBDIRS "num_subdirs"
168 #define LBANN_OPTION_OUTPUT_BASE_DIR "output_base_dir"
169 #define LBANN_OPTION_OUTPUT_BASE_FN "output_base_fn"
170 #define LBANN_OPTION_OUTPUT_DIR "output_dir"
171 #define LBANN_OPTION_OUTPUT_FN "output_fn"
172 #define LBANN_OPTION_SAMPLES_PER_FILE "samples_per_file"
173 
174 void construct_std_options();
177 void construct_jag_options();
178 void construct_all_options();
179 
180 } // namespace lbann
181 
182 #endif // LBANN_UTILS_OPTIONS_HPP_INCLUDED
void construct_datareader_options()
void construct_jag_options()
void construct_datastore_options()
void construct_all_options()
void construct_std_options()