27 #ifndef LBANN_SUMMARY_IMPL_HPP_INCLUDED 28 #define LBANN_SUMMARY_IMPL_HPP_INCLUDED 35 #ifdef LBANN_HAS_TBINF 37 template <
typename TensorDataType>
40 const El::AbstractDistMatrix<TensorDataType>& mat,
43 using AccumT = BiggerOf<TensorDataType, float>;
48 El::DistData mat_format(mat);
49 if (mat_format.colDist == El::STAR && mat_format.rowDist == El::STAR) {
51 if (m_comm->am_trainer_master()) {
52 sum = local_sum(mat.LockedMatrix());
60 sum = local_sum(mat.LockedMatrix());
64 m_pending_means.emplace_back(tag,
68 mat.Height() * mat.Width());
71 template <
typename TensorDataType>
74 const El::AbstractDistMatrix<TensorDataType>& mat,
77 using AccumT = BiggerOf<TensorDataType, float>;
78 AccumT mat_local_min = local_min(mat.LockedMatrix());
79 m_pending_mins.emplace_back(tag, step, mat_local_min);
82 template <
typename TensorDataType>
85 const El::AbstractDistMatrix<TensorDataType>& mat,
88 using AccumT = BiggerOf<TensorDataType, float>;
89 AccumT mat_local_max = local_max(mat.LockedMatrix());
90 m_pending_maxes.emplace_back(tag, step, mat_local_max);
93 template <
typename TensorDataType>
96 const El::AbstractDistMatrix<TensorDataType>& mat,
99 using AccumT = BiggerOf<TensorDataType, float>;
105 El::DistData mat_format(mat);
106 if (mat_format.colDist == El::STAR && mat_format.rowDist == El::STAR) {
108 if (m_comm->am_trainer_master()) {
109 local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
117 local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
121 m_pending_stdevs.emplace_back(tag,
125 mat.Height() * mat.Width());
128 template <
typename TensorDataType>
132 if (m_comm->am_trainer_master()) {
133 m_pending_scalars.emplace_back(tag, step, s);
137 template <
typename TensorDataType>
142 m_pending_sum_scalars.emplace_back(tag, step, s);
145 template <
typename TensorDataType>
150 m_pending_scalar_alls.emplace_back(tag, step, s);
153 template <
typename TensorDataType>
155 const std::string tag,
156 const El::AbstractDistMatrix<TensorDataType>& mat,
159 using AccumT = BiggerOf<TensorDataType, float>;
160 AccumT mat_local_min = local_min(mat.LockedMatrix());
161 AccumT mat_local_max = local_max(mat.LockedMatrix());
166 El::DistData mat_format(mat);
167 if (mat_format.colDist == El::STAR && mat_format.rowDist == El::STAR) {
169 if (m_comm->am_trainer_master()) {
170 local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
178 local_sum_sqsum(mat.LockedMatrix(), sum, sqsum);
181 std::vector<double> buckets(m_histogram_buckets.size() + 1, 0.0);
182 const auto height = mat.LocalHeight();
183 const auto width = mat.LocalWidth();
184 const auto ldim = mat.LDim();
185 const auto* __restrict__ mat_buf = mat.LockedMatrix().LockedBuffer();
186 for (
auto row = 0; row < height; ++row) {
187 for (
auto col = 0; col < width; ++col) {
189 auto bucket = std::distance(m_histogram_buckets.begin(),
190 std::upper_bound(m_histogram_buckets.begin(),
191 m_histogram_buckets.end(),
192 mat_buf[row + col * ldim]));
194 buckets.at(bucket) += 1.0;
196 buckets[bucket] += 1.0;
197 #endif // LBANN_DEBUG 201 m_pending_histograms.emplace_back(tag,
206 mat.Height() * mat.Width(),
212 template <
typename TensorDataType>
215 const El::AbstractDistMatrix<TensorDataType>& mat,
219 using AccumT = BiggerOf<TensorDataType, float>;
220 AccumT local_norm = local_2norm(mat.LockedMatrix());
224 template <
typename TensorDataType>
226 lbann_summary::local_sum(
const El::AbstractMatrix<TensorDataType>& mat)
const 227 -> BiggerOf<TensorDataType, float>
231 const El::Int height = mat.Height();
232 const El::Int width = mat.Width();
233 const El::Int ldim = mat.LDim();
234 const auto* __restrict__ mat_buf = mat.LockedBuffer();
235 using AccumT = BiggerOf<TensorDataType, float>;
236 AccumT sum = AccumT(0);
237 if (ldim == height) {
238 const El::Int size = height * width;
240 for (El::Int i = 0; i < size; ++i) {
246 for (
El::Int row = 0; row < height; ++row) {
247 for (El::Int col = 0; col < width; ++col) {
248 sum += mat_buf[row + col * ldim];
255 template <
typename TensorDataType,
typename AccumT>
257 lbann_summary::local_sum_sqsum(
const El::AbstractMatrix<TensorDataType>& mat,
263 const El::Int height = mat.Height();
264 const El::Int width = mat.Width();
265 const El::Int ldim = mat.LDim();
266 const auto* __restrict__ mat_buf = mat.LockedBuffer();
269 if (ldim == height) {
270 const El::Int size = height * width;
272 for (El::Int i = 0; i < size; ++i) {
273 const DataType val = mat_buf[i];
280 for (
El::Int row = 0; row < height; ++row) {
281 for (El::Int col = 0; col < width; ++col) {
282 const DataType val = mat_buf[row + col * ldim];
290 template <
typename TensorDataType>
292 lbann_summary::local_min(
const El::AbstractMatrix<TensorDataType>& mat)
const 293 -> BiggerOf<TensorDataType, float>
296 const El::Int height = mat.Height();
297 const El::Int width = mat.Width();
298 const El::Int ldim = mat.LDim();
299 const auto* __restrict__ mat_buf = mat.LockedBuffer();
300 using AccumT = BiggerOf<TensorDataType, float>;
301 AccumT min = std::numeric_limits<AccumT>::max();
302 if (ldim == height) {
303 const El::Int size = height * width;
305 for (El::Int i = 0; i < size; ++i) {
306 min = El::Min(min, mat_buf[i]);
311 for (
El::Int row = 0; row < height; ++row) {
312 for (El::Int col = 0; col < width; ++col) {
313 min = El::Min(min, mat_buf[row + col * ldim]);
320 template <
typename TensorDataType>
322 lbann_summary::local_max(
const El::AbstractMatrix<TensorDataType>& mat)
const 323 -> BiggerOf<TensorDataType, float>
326 const El::Int height = mat.Height();
327 const El::Int width = mat.Width();
328 const El::Int ldim = mat.LDim();
329 const auto* __restrict__ mat_buf = mat.LockedBuffer();
330 using AccumT = BiggerOf<TensorDataType, float>;
331 AccumT max = std::numeric_limits<AccumT>::min();
332 if (ldim == height) {
333 const El::Int size = height * width;
335 for (El::Int i = 0; i < size; ++i) {
336 max = El::Max(max, mat_buf[i]);
341 for (
El::Int row = 0; row < height; ++row) {
342 for (El::Int col = 0; col < width; ++col) {
343 max = El::Max(max, mat_buf[row + col * ldim]);
350 template <
typename TensorDataType>
352 lbann_summary::local_2norm(
const El::AbstractMatrix<TensorDataType>& mat)
const 353 -> BiggerOf<TensorDataType, float>
357 const El::Int height = mat.Height();
358 const El::Int width = mat.Width();
359 const El::Int ldim = mat.LDim();
360 const auto* __restrict__ mat_buf = mat.LockedBuffer();
361 using AccumT = BiggerOf<TensorDataType, float>;
362 AccumT norm = AccumT(0);
363 if (ldim == height) {
364 const El::Int size = height * width;
366 for (El::Int i = 0; i < size; ++i) {
367 norm += mat_buf[i] * mat_buf[i];
372 for (
El::Int row = 0; row < height; ++row) {
373 for (El::Int col = 0; col < width; ++col) {
374 norm += mat_buf[row + col * ldim] * mat_buf[row + col * ldim];
378 return El::Sqrt(norm);
381 #endif // LBANN_HAS_TBINF 385 #endif // LBANN_SUMMARY_IMPL_HPP_INCLUDED #define LBANN_CALIPER_MARK_FUNCTION
void reduce_mean(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_stdev(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_scalar(const std::string tag, TensorDataType s, int step)
void sum_reduce_scalar(const std::string tag, TensorDataType s, int step)
void reduce_2norm(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_histogram(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_scalar_all(const std::string tag, TensorDataType s, int step)
#define LBANN_OMP_PARALLEL_FOR_ARGS(arg)
void reduce_min(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)
void reduce_max(const std::string tag, const El::AbstractDistMatrix< TensorDataType > &mat, int step)