LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
summary_impl.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_SUMMARY_IMPL_HPP_INCLUDED
28 #define LBANN_SUMMARY_IMPL_HPP_INCLUDED
29 
30 #include "lbann/utils/summary.hpp"
32 
33 namespace lbann {
34 
35 #ifdef LBANN_HAS_TBINF
36 
37 template <typename TensorDataType>
38 inline void
39 lbann_summary::reduce_mean(const std::string tag,
40  const El::AbstractDistMatrix<TensorDataType>& mat,
41  int step)
42 {
43  using AccumT = BiggerOf<TensorDataType, float>;
44  // Local sum
45  AccumT sum = 0.0;
46 
47  // Check distributed matrix format
48  El::DistData mat_format(mat);
49  if (mat_format.colDist == El::STAR && mat_format.rowDist == El::STAR) {
50  // Compute local sum on master process if matrix is Star,Star
51  if (m_comm->am_trainer_master()) {
52  sum = local_sum(mat.LockedMatrix());
53  }
54  }
55  else {
56  // Compute local sum on all processes if matrix is in MC,MR;
57  // Star,VC; or similar format
58  // TODO: implement for matrices in Circ,Circ; MC,Star; or similar
59  // formats
60  sum = local_sum(mat.LockedMatrix());
61  }
62 
63  // Add local sum to list of pending means
64  m_pending_means.emplace_back(tag,
65  step,
66  sum,
67  0.0f,
68  mat.Height() * mat.Width());
69 }
70 
71 template <typename TensorDataType>
72 inline void
73 lbann_summary::reduce_min(const std::string tag,
74  const El::AbstractDistMatrix<TensorDataType>& mat,
75  int step)
76 {
77  using AccumT = BiggerOf<TensorDataType, float>;
78  AccumT mat_local_min = local_min(mat.LockedMatrix());
79  m_pending_mins.emplace_back(tag, step, mat_local_min);
80 }
81 
82 template <typename TensorDataType>
83 inline void
84 lbann_summary::reduce_max(const std::string tag,
85  const El::AbstractDistMatrix<TensorDataType>& mat,
86  int step)
87 {
88  using AccumT = BiggerOf<TensorDataType, float>;
89  AccumT mat_local_max = local_max(mat.LockedMatrix());
90  m_pending_maxes.emplace_back(tag, step, mat_local_max);
91 }
92 
93 template <typename TensorDataType>
94 inline void
95 lbann_summary::reduce_stdev(const std::string tag,
96  const El::AbstractDistMatrix<TensorDataType>& mat,
97  int step)
98 {
99  using AccumT = BiggerOf<TensorDataType, float>;
100  // Local sum and squared sum
101  AccumT sum = 0.0;
102  AccumT sqsum = 0.0;
103 
104  // Check distributed matrix format
105  El::DistData mat_format(mat);
106  if (mat_format.colDist == El::STAR && mat_format.rowDist == El::STAR) {
107  // Compute local sums on master process if matrix is Star,Star
108  if (m_comm->am_trainer_master()) {
109  local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
110  }
111  }
112  else {
113  // Compute local sums on all processes if matrix is in MC,MR;
114  // Star,VC; or similar format
115  // TODO: implement for matrices in Circ,Circ; MC,Star; or similar
116  // formats
117  local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
118  }
119 
120  // Add local sums to list of pending stdevs.
121  m_pending_stdevs.emplace_back(tag,
122  step,
123  sum,
124  sqsum,
125  mat.Height() * mat.Width());
126 }
127 
128 template <typename TensorDataType>
129 inline void
130 lbann_summary::reduce_scalar(const std::string tag, TensorDataType s, int step)
131 {
132  if (m_comm->am_trainer_master()) {
133  m_pending_scalars.emplace_back(tag, step, s);
134  }
135 }
136 
137 template <typename TensorDataType>
138 inline void lbann_summary::sum_reduce_scalar(const std::string tag,
139  TensorDataType s,
140  int step)
141 {
142  m_pending_sum_scalars.emplace_back(tag, step, s);
143 }
144 
145 template <typename TensorDataType>
146 inline void lbann_summary::reduce_scalar_all(const std::string tag,
147  TensorDataType s,
148  int step)
149 {
150  m_pending_scalar_alls.emplace_back(tag, step, s);
151 }
152 
153 template <typename TensorDataType>
155  const std::string tag,
156  const El::AbstractDistMatrix<TensorDataType>& mat,
157  int step)
158 {
159  using AccumT = BiggerOf<TensorDataType, float>;
160  AccumT mat_local_min = local_min(mat.LockedMatrix());
161  AccumT mat_local_max = local_max(mat.LockedMatrix());
162  // Local sum and squared sum
163  AccumT sum = 0.0;
164  AccumT sqsum = 0.0;
165  // Check distributed matrix format
166  El::DistData mat_format(mat);
167  if (mat_format.colDist == El::STAR && mat_format.rowDist == El::STAR) {
168  // Compute local sums on master process if matrix is Star,Star
169  if (m_comm->am_trainer_master()) {
170  local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
171  }
172  }
173  else {
174  // Compute local sums on all processes if matrix is in MC,MR;
175  // Star,VC; or similar format
176  // TODO: implement for matrices in Circ,Circ; MC,Star; or similar
177  // formats
178  local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
179  }
180  // Compute local buckets.
181  std::vector<double> buckets(m_histogram_buckets.size() + 1, 0.0);
182  const auto height = mat.LocalHeight();
183  const auto width = mat.LocalWidth();
184  const auto ldim = mat.LDim();
185  const auto* __restrict__ mat_buf = mat.LockedMatrix().LockedBuffer();
186  for (auto row = 0; row < height; ++row) {
187  for (auto col = 0; col < width; ++col) {
188  // Note: This could be optimized; upper_bound takes O(logn) time.
189  auto bucket = std::distance(m_histogram_buckets.begin(),
190  std::upper_bound(m_histogram_buckets.begin(),
191  m_histogram_buckets.end(),
192  mat_buf[row + col * ldim]));
193 #ifdef LBANN_DEBUG
194  buckets.at(bucket) += 1.0;
195 #else
196  buckets[bucket] += 1.0;
197 #endif // LBANN_DEBUG
198  }
199  }
200  // Add to list of pending histograms.
201  m_pending_histograms.emplace_back(tag,
202  step,
203  std::move(buckets),
204  mat_local_min,
205  mat_local_max,
206  mat.Height() * mat.Width(),
207  sum,
208  sqsum);
209  // TODO: Support histograms on multiple models.
210 }
211 
212 template <typename TensorDataType>
213 inline void
214 lbann_summary::reduce_2norm(const std::string tag,
215  const El::AbstractDistMatrix<TensorDataType>& mat,
216  int step)
217 {
218  // Using a squared 2-norm so that we can just sum this.
219  using AccumT = BiggerOf<TensorDataType, float>;
220  AccumT local_norm = local_2norm(mat.LockedMatrix());
221  sum_reduce_scalar(tag, local_norm * local_norm, step);
222 }
223 
224 template <typename TensorDataType>
225 inline auto
226 lbann_summary::local_sum(const El::AbstractMatrix<TensorDataType>& mat) const
227  -> BiggerOf<TensorDataType, float>
228 {
230  // Note there are more numerically stable ways to compute a sum.
231  const El::Int height = mat.Height();
232  const El::Int width = mat.Width();
233  const El::Int ldim = mat.LDim();
234  const auto* __restrict__ mat_buf = mat.LockedBuffer();
235  using AccumT = BiggerOf<TensorDataType, float>;
236  AccumT sum = AccumT(0);
237  if (ldim == height) {
238  const El::Int size = height * width;
239  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(+ : sum))
240  for (El::Int i = 0; i < size; ++i) {
241  sum += mat_buf[i];
242  }
243  }
244  else {
245  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(+ : sum) collapse(2))
246  for (El::Int row = 0; row < height; ++row) {
247  for (El::Int col = 0; col < width; ++col) {
248  sum += mat_buf[row + col * ldim];
249  }
250  }
251  }
252  return sum;
253 }
254 
255 template <typename TensorDataType, typename AccumT>
256 inline void
257 lbann_summary::local_sum_sqsum(const El::AbstractMatrix<TensorDataType>& mat,
258  AccumT& sum,
259  AccumT& sqsum) const
260 {
262  // Note there are more numerically stable ways to compute a sum.
263  const El::Int height = mat.Height();
264  const El::Int width = mat.Width();
265  const El::Int ldim = mat.LDim();
266  const auto* __restrict__ mat_buf = mat.LockedBuffer();
267  sum = AccumT(0);
268  sqsum = AccumT(0);
269  if (ldim == height) {
270  const El::Int size = height * width;
271  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(+ : sum, sqsum))
272  for (El::Int i = 0; i < size; ++i) {
273  const DataType val = mat_buf[i];
274  sum += val;
275  sqsum += val * val;
276  }
277  }
278  else {
279  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(+ : sum, sqsum) collapse(2))
280  for (El::Int row = 0; row < height; ++row) {
281  for (El::Int col = 0; col < width; ++col) {
282  const DataType val = mat_buf[row + col * ldim];
283  sum += val;
284  sqsum += val * val;
285  }
286  }
287  }
288 }
289 
290 template <typename TensorDataType>
291 inline auto
292 lbann_summary::local_min(const El::AbstractMatrix<TensorDataType>& mat) const
293  -> BiggerOf<TensorDataType, float>
294 {
296  const El::Int height = mat.Height();
297  const El::Int width = mat.Width();
298  const El::Int ldim = mat.LDim();
299  const auto* __restrict__ mat_buf = mat.LockedBuffer();
300  using AccumT = BiggerOf<TensorDataType, float>;
301  AccumT min = std::numeric_limits<AccumT>::max();
302  if (ldim == height) {
303  const El::Int size = height * width;
304  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(min : min))
305  for (El::Int i = 0; i < size; ++i) {
306  min = El::Min(min, mat_buf[i]);
307  }
308  }
309  else {
310  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(min : min) collapse(2))
311  for (El::Int row = 0; row < height; ++row) {
312  for (El::Int col = 0; col < width; ++col) {
313  min = El::Min(min, mat_buf[row + col * ldim]);
314  }
315  }
316  }
317  return min;
318 }
319 
320 template <typename TensorDataType>
321 inline auto
322 lbann_summary::local_max(const El::AbstractMatrix<TensorDataType>& mat) const
323  -> BiggerOf<TensorDataType, float>
324 {
326  const El::Int height = mat.Height();
327  const El::Int width = mat.Width();
328  const El::Int ldim = mat.LDim();
329  const auto* __restrict__ mat_buf = mat.LockedBuffer();
330  using AccumT = BiggerOf<TensorDataType, float>;
331  AccumT max = std::numeric_limits<AccumT>::min();
332  if (ldim == height) {
333  const El::Int size = height * width;
334  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(max : max))
335  for (El::Int i = 0; i < size; ++i) {
336  max = El::Max(max, mat_buf[i]);
337  }
338  }
339  else {
340  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(max : max) collapse(2))
341  for (El::Int row = 0; row < height; ++row) {
342  for (El::Int col = 0; col < width; ++col) {
343  max = El::Max(max, mat_buf[row + col * ldim]);
344  }
345  }
346  }
347  return max;
348 }
349 
350 template <typename TensorDataType>
351 inline auto
352 lbann_summary::local_2norm(const El::AbstractMatrix<TensorDataType>& mat) const
353  -> BiggerOf<TensorDataType, float>
354 {
356  // Note there are more numerically stable ways to compute this.
357  const El::Int height = mat.Height();
358  const El::Int width = mat.Width();
359  const El::Int ldim = mat.LDim();
360  const auto* __restrict__ mat_buf = mat.LockedBuffer();
361  using AccumT = BiggerOf<TensorDataType, float>;
362  AccumT norm = AccumT(0);
363  if (ldim == height) {
364  const El::Int size = height * width;
365  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(+ : norm))
366  for (El::Int i = 0; i < size; ++i) {
367  norm += mat_buf[i] * mat_buf[i];
368  }
369  }
370  else {
371  LBANN_OMP_PARALLEL_FOR_ARGS(reduction(+ : norm) collapse(2))
372  for (El::Int row = 0; row < height; ++row) {
373  for (El::Int col = 0; col < width; ++col) {
374  norm += mat_buf[row + col * ldim] * mat_buf[row + col * ldim];
375  }
376  }
377  }
378  return El::Sqrt(norm);
379 }
380 
381 #endif // LBANN_HAS_TBINF
382 
383 } // namespace lbann
384 
385 #endif // LBANN_SUMMARY_IMPL_HPP_INCLUDED
#define LBANN_CALIPER_MARK_FUNCTION
Definition: profiling.hpp:55
void reduce_mean(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_stdev(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_scalar(const std::string tag, TensorDataType s, int step)
void sum_reduce_scalar(const std::string tag, TensorDataType s, int step)
void reduce_2norm(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_histogram(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_scalar_all(const std::string tag, TensorDataType s, int step)
#define LBANN_OMP_PARALLEL_FOR_ARGS(arg)
Definition: omp_pragma.hpp:64
void reduce_min(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_max(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)