d2/da9/operators_2math_2common_8hpp_source.html

 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
 // Produced at the Lawrence Livermore National Laboratory.
 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
 //
 // LLNL-CODE-697807.
 // All rights reserved.
 //
 // This file is part of LBANN: Livermore Big Artificial Neural Network
 // Toolkit. For details, see http://software.llnl.gov/LBANN or
 // https://github.com/LLNL/LBANN.
 //
 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
 // may not use this file except in compliance with the License.  You may
 // obtain a copy of the License at:
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 // implied. See the License for the specific language governing
 // permissions and limitations under the license.
 #ifndef LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
 #define LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED

 #include "lbann/base.hpp"
 #include "lbann/utils/profiling.hpp"

 namespace lbann {
 namespace internal {

 template <typename S, typename T, typename U, typename F>
 void EntrywiseZipInto(El::Matrix<S, El::Device::CPU> const& A,
                       El::Matrix<T, El::Device::CPU> const& B,
                       El::Matrix<U, El::Device::CPU>& C,
                       F func)
 {
   EL_DEBUG_CSE;
   auto const m = A.Height();
   auto const n = A.Width();

   LBANN_ASSERT_DEBUG(B.Height() == m);
   LBANN_ASSERT_DEBUG(B.Width() == n);

   LBANN_ASSERT_DEBUG(C.Height() == m);
   LBANN_ASSERT_DEBUG(C.Width() == n);

   S const* ABuf = A.LockedBuffer();
   T const* BBuf = B.LockedBuffer();
   U* CBuf = C.Buffer();

   auto const ALDim = A.LDim();
   auto const BLDim = B.LDim();
   auto const CLDim = C.LDim();

   // Use entry-wise parallelization for column vectors. Otherwise
   // use column-wise parallelization.
   if (n == 1) {
     EL_PARALLEL_FOR
     for (El::Int i = 0; i < m; ++i) {
       CBuf[i] = func(ABuf[i], BBuf[i]);
     }
   }
   else {
     EL_PARALLEL_FOR_COLLAPSE2
     for (El::Int j = 0; j < n; ++j) {
       for (El::Int i = 0; i < m; ++i) {
         CBuf[i + j * CLDim] = func(ABuf[i + j * ALDim], BBuf[i + j * BLDim]);
       }
     }
   }
 }

 template <typename DataT, typename F>
 void apply_binary_backprop_operator(
   El::Matrix<DataT, El::Device::CPU> const& x1,
   El::Matrix<DataT, El::Device::CPU> const& x2,
   El::Matrix<DataT, El::Device::CPU> const& dy,
   El::Matrix<DataT, El::Device::CPU>& dx1,
   El::Matrix<DataT, El::Device::CPU>& dx2,
   F f)
 {
   LBANN_CALIPER_MARK_FUNCTION;
   if (x1.Contiguous() && x2.Contiguous() && dy.Contiguous() &&
       dx1.Contiguous() && dx2.Contiguous()) {
     const auto* x1_buffer = x1.LockedBuffer();
     const auto* x2_buffer = x2.LockedBuffer();
     const auto* dy_buffer = dy.LockedBuffer();
     auto* dx1_buffer = dx1.Buffer();
     auto* dx2_buffer = dx2.Buffer();
     const size_t size = x1.Height() * x1.Width();
     LBANN_OMP_PARALLEL_FOR
     for (size_t i = 0; i < size; ++i) {
       f(x1_buffer[i], x2_buffer[i], dy_buffer[i], dx1_buffer[i], dx2_buffer[i]);
     }
   }
   else {
     auto const width = x1.Width();
     auto const height = x1.Height();
     LBANN_OMP_PARALLEL_FOR_COLLAPSE2
     for (El::Int jj = 0; jj < width; ++jj) {
       for (El::Int ii = 0; ii < height; ++ii) {
         f(x1(ii, jj), x2(ii, jj), dy(ii, jj), dx1(ii, jj), dx2(ii, jj));
       }
     }
   }
 }

 template <typename S, typename T, typename U, typename R, typename F>
 void EntrywiseZipInto(El::Matrix<S, El::Device::CPU> const& A,
                       El::Matrix<T, El::Device::CPU> const& B,
                       El::Matrix<U, El::Device::CPU> const& C,
                       El::Matrix<R, El::Device::CPU>& D,
                       F func)
 {
   EL_DEBUG_CSE;
   auto const m = A.Height();
   auto const n = A.Width();

   LBANN_ASSERT_DEBUG(B.Height() == m);
   LBANN_ASSERT_DEBUG(B.Width() == n);

   LBANN_ASSERT_DEBUG(C.Height() == m);
   LBANN_ASSERT_DEBUG(C.Width() == n);

   LBANN_ASSERT_DEBUG(D.Height() == m);
   LBANN_ASSERT_DEBUG(D.Width() == n);

   S const* ABuf = A.LockedBuffer();
   T const* BBuf = B.LockedBuffer();
   U const* CBuf = C.LockedBuffer();
   R* DBuf = D.Buffer();

   auto const ALDim = A.LDim();
   auto const BLDim = B.LDim();
   auto const CLDim = C.LDim();
   auto const DLDim = D.LDim();

   // Use entry-wise parallelization for column vectors. Otherwise
   // use column-wise parallelization.
   if (n == 1) {
     EL_PARALLEL_FOR
     for (El::Int i = 0; i < m; ++i) {
       DBuf[i] = func(ABuf[i], BBuf[i], CBuf[i]);
     }
   }
   else {
     EL_PARALLEL_FOR_COLLAPSE2
     for (El::Int j = 0; j < n; ++j) {
       for (El::Int i = 0; i < m; ++i) {
         DBuf[i + j * DLDim] =
           func(ABuf[i + j * ALDim], BBuf[i + j * BLDim], CBuf[i + j * CLDim]);
       }
     }
   }
 }

 template <typename DataT, typename F>
 void apply_ternary_backprop_operator(
   El::Matrix<DataT, El::Device::CPU> const& x1,
   El::Matrix<DataT, El::Device::CPU> const& x2,
   El::Matrix<DataT, El::Device::CPU> const& x3,
   El::Matrix<DataT, El::Device::CPU> const& dy,
   El::Matrix<DataT, El::Device::CPU>& dx1,
   El::Matrix<DataT, El::Device::CPU>& dx2,
   El::Matrix<DataT, El::Device::CPU>& dx3,
   F f)
 {
   LBANN_CALIPER_MARK_FUNCTION;
   if (x1.Contiguous() && x2.Contiguous() && x3.Contiguous() &&
       dy.Contiguous() && dx1.Contiguous() && dx2.Contiguous() &&
       dx3.Contiguous()) {
     const auto* x1_buffer = x1.LockedBuffer();
     const auto* x2_buffer = x2.LockedBuffer();
     const auto* x3_buffer = x3.LockedBuffer();
     const auto* dy_buffer = dy.LockedBuffer();
     auto* dx1_buffer = dx1.Buffer();
     auto* dx2_buffer = dx2.Buffer();
     auto* dx3_buffer = dx3.Buffer();
     const size_t size = x1.Height() * x1.Width();
     LBANN_OMP_PARALLEL_FOR
     for (size_t i = 0; i < size; ++i) {
       f(x1_buffer[i],
         x2_buffer[i],
         x3_buffer[i],
         dy_buffer[i],
         dx1_buffer[i],
         dx2_buffer[i],
         dx3_buffer[i]);
     }
   }
   else {
     auto const width = x1.Width();
     auto const height = x1.Height();
     LBANN_OMP_PARALLEL_FOR_COLLAPSE2
     for (El::Int jj = 0; jj < width; ++jj) {
       for (El::Int ii = 0; ii < height; ++ii) {
         f(x1(ii, jj),
           x2(ii, jj),
           x3(ii, jj),
           dy(ii, jj),
           dx1(ii, jj),
           dx2(ii, jj),
           dx3(ii, jj));
       }
     }
   }
 }

 } // namespace internal
 } // namespace lbann
 #endif // LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
LBANN_CALIPER_MARK_FUNCTION
#define LBANN_CALIPER_MARK_FUNCTION
Definition: profiling.hpp:55

profiling.hpp

lbann::internal::EntrywiseZipInto
void EntrywiseZipInto(El::Matrix< S, El::Device::CPU > const &A, El::Matrix< T, El::Device::CPU > const &B, El::Matrix< U, El::Device::CPU > &C, F func)
A binary entrywise map c <- f(a,b).
Definition: operators/math/common.hpp:38

LBANN_ASSERT_DEBUG
#define LBANN_ASSERT_DEBUG(cond)
Definition: exception.hpp:104

lbann::internal::apply_ternary_backprop_operator
void apply_ternary_backprop_operator(El::Matrix< DataT, El::Device::CPU > const &x1, El::Matrix< DataT, El::Device::CPU > const &x2, El::Matrix< DataT, El::Device::CPU > const &x3, El::Matrix< DataT, El::Device::CPU > const &dy, El::Matrix< DataT, El::Device::CPU > &dx1, El::Matrix< DataT, El::Device::CPU > &dx2, El::Matrix< DataT, El::Device::CPU > &dx3, F f)
Definition: operators/math/common.hpp:182

LBANN_OMP_PARALLEL_FOR_COLLAPSE2
#define LBANN_OMP_PARALLEL_FOR_COLLAPSE2
Definition: omp_pragma.hpp:68

LBANN_OMP_PARALLEL_FOR
#define LBANN_OMP_PARALLEL_FOR
Definition: omp_pragma.hpp:67

base.hpp

lbann::internal::apply_binary_backprop_operator
void apply_binary_backprop_operator(El::Matrix< DataT, El::Device::CPU > const &x1, El::Matrix< DataT, El::Device::CPU > const &x2, El::Matrix< DataT, El::Device::CPU > const &dy, El::Matrix< DataT, El::Device::CPU > &dx1, El::Matrix< DataT, El::Device::CPU > &dx2, F f)
Definition: operators/math/common.hpp:88

lbann
Definition: callback_helpers.hpp:32