LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
operators/math/common.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 #ifndef LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
27 #define LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
28 
29 #include "lbann/base.hpp"
31 
32 namespace lbann {
33 namespace internal {
34 
37 template <typename S, typename T, typename U, typename F>
38 void EntrywiseZipInto(El::Matrix<S, El::Device::CPU> const& A,
39  El::Matrix<T, El::Device::CPU> const& B,
40  El::Matrix<U, El::Device::CPU>& C,
41  F func)
42 {
43  EL_DEBUG_CSE;
44  auto const m = A.Height();
45  auto const n = A.Width();
46 
47  LBANN_ASSERT_DEBUG(B.Height() == m);
48  LBANN_ASSERT_DEBUG(B.Width() == n);
49 
50  LBANN_ASSERT_DEBUG(C.Height() == m);
51  LBANN_ASSERT_DEBUG(C.Width() == n);
52 
53  S const* ABuf = A.LockedBuffer();
54  T const* BBuf = B.LockedBuffer();
55  U* CBuf = C.Buffer();
56 
57  auto const ALDim = A.LDim();
58  auto const BLDim = B.LDim();
59  auto const CLDim = C.LDim();
60 
61  // Use entry-wise parallelization for column vectors. Otherwise
62  // use column-wise parallelization.
63  if (n == 1) {
64  EL_PARALLEL_FOR
65  for (El::Int i = 0; i < m; ++i) {
66  CBuf[i] = func(ABuf[i], BBuf[i]);
67  }
68  }
69  else {
70  EL_PARALLEL_FOR_COLLAPSE2
71  for (El::Int j = 0; j < n; ++j) {
72  for (El::Int i = 0; i < m; ++i) {
73  CBuf[i + j * CLDim] = func(ABuf[i + j * ALDim], BBuf[i + j * BLDim]);
74  }
75  }
76  }
77 }
78 
87 template <typename DataT, typename F>
89  El::Matrix<DataT, El::Device::CPU> const& x1,
90  El::Matrix<DataT, El::Device::CPU> const& x2,
91  El::Matrix<DataT, El::Device::CPU> const& dy,
92  El::Matrix<DataT, El::Device::CPU>& dx1,
93  El::Matrix<DataT, El::Device::CPU>& dx2,
94  F f)
95 {
97  if (x1.Contiguous() && x2.Contiguous() && dy.Contiguous() &&
98  dx1.Contiguous() && dx2.Contiguous()) {
99  const auto* x1_buffer = x1.LockedBuffer();
100  const auto* x2_buffer = x2.LockedBuffer();
101  const auto* dy_buffer = dy.LockedBuffer();
102  auto* dx1_buffer = dx1.Buffer();
103  auto* dx2_buffer = dx2.Buffer();
104  const size_t size = x1.Height() * x1.Width();
106  for (size_t i = 0; i < size; ++i) {
107  f(x1_buffer[i], x2_buffer[i], dy_buffer[i], dx1_buffer[i], dx2_buffer[i]);
108  }
109  }
110  else {
111  auto const width = x1.Width();
112  auto const height = x1.Height();
114  for (El::Int jj = 0; jj < width; ++jj) {
115  for (El::Int ii = 0; ii < height; ++ii) {
116  f(x1(ii, jj), x2(ii, jj), dy(ii, jj), dx1(ii, jj), dx2(ii, jj));
117  }
118  }
119  }
120 }
121 
124 template <typename S, typename T, typename U, typename R, typename F>
125 void EntrywiseZipInto(El::Matrix<S, El::Device::CPU> const& A,
126  El::Matrix<T, El::Device::CPU> const& B,
127  El::Matrix<U, El::Device::CPU> const& C,
128  El::Matrix<R, El::Device::CPU>& D,
129  F func)
130 {
131  EL_DEBUG_CSE;
132  auto const m = A.Height();
133  auto const n = A.Width();
134 
135  LBANN_ASSERT_DEBUG(B.Height() == m);
136  LBANN_ASSERT_DEBUG(B.Width() == n);
137 
138  LBANN_ASSERT_DEBUG(C.Height() == m);
139  LBANN_ASSERT_DEBUG(C.Width() == n);
140 
141  LBANN_ASSERT_DEBUG(D.Height() == m);
142  LBANN_ASSERT_DEBUG(D.Width() == n);
143 
144  S const* ABuf = A.LockedBuffer();
145  T const* BBuf = B.LockedBuffer();
146  U const* CBuf = C.LockedBuffer();
147  R* DBuf = D.Buffer();
148 
149  auto const ALDim = A.LDim();
150  auto const BLDim = B.LDim();
151  auto const CLDim = C.LDim();
152  auto const DLDim = D.LDim();
153 
154  // Use entry-wise parallelization for column vectors. Otherwise
155  // use column-wise parallelization.
156  if (n == 1) {
157  EL_PARALLEL_FOR
158  for (El::Int i = 0; i < m; ++i) {
159  DBuf[i] = func(ABuf[i], BBuf[i], CBuf[i]);
160  }
161  }
162  else {
163  EL_PARALLEL_FOR_COLLAPSE2
164  for (El::Int j = 0; j < n; ++j) {
165  for (El::Int i = 0; i < m; ++i) {
166  DBuf[i + j * DLDim] =
167  func(ABuf[i + j * ALDim], BBuf[i + j * BLDim], CBuf[i + j * CLDim]);
168  }
169  }
170  }
171 }
172 
181 template <typename DataT, typename F>
183  El::Matrix<DataT, El::Device::CPU> const& x1,
184  El::Matrix<DataT, El::Device::CPU> const& x2,
185  El::Matrix<DataT, El::Device::CPU> const& x3,
186  El::Matrix<DataT, El::Device::CPU> const& dy,
187  El::Matrix<DataT, El::Device::CPU>& dx1,
188  El::Matrix<DataT, El::Device::CPU>& dx2,
189  El::Matrix<DataT, El::Device::CPU>& dx3,
190  F f)
191 {
193  if (x1.Contiguous() && x2.Contiguous() && x3.Contiguous() &&
194  dy.Contiguous() && dx1.Contiguous() && dx2.Contiguous() &&
195  dx3.Contiguous()) {
196  const auto* x1_buffer = x1.LockedBuffer();
197  const auto* x2_buffer = x2.LockedBuffer();
198  const auto* x3_buffer = x3.LockedBuffer();
199  const auto* dy_buffer = dy.LockedBuffer();
200  auto* dx1_buffer = dx1.Buffer();
201  auto* dx2_buffer = dx2.Buffer();
202  auto* dx3_buffer = dx3.Buffer();
203  const size_t size = x1.Height() * x1.Width();
205  for (size_t i = 0; i < size; ++i) {
206  f(x1_buffer[i],
207  x2_buffer[i],
208  x3_buffer[i],
209  dy_buffer[i],
210  dx1_buffer[i],
211  dx2_buffer[i],
212  dx3_buffer[i]);
213  }
214  }
215  else {
216  auto const width = x1.Width();
217  auto const height = x1.Height();
219  for (El::Int jj = 0; jj < width; ++jj) {
220  for (El::Int ii = 0; ii < height; ++ii) {
221  f(x1(ii, jj),
222  x2(ii, jj),
223  x3(ii, jj),
224  dy(ii, jj),
225  dx1(ii, jj),
226  dx2(ii, jj),
227  dx3(ii, jj));
228  }
229  }
230  }
231 }
232 
233 } // namespace internal
234 } // namespace lbann
235 #endif // LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED
#define LBANN_CALIPER_MARK_FUNCTION
Definition: profiling.hpp:55
void EntrywiseZipInto(El::Matrix< S, El::Device::CPU > const &A, El::Matrix< T, El::Device::CPU > const &B, El::Matrix< U, El::Device::CPU > &C, F func)
A binary entrywise map c <- f(a,b).
#define LBANN_ASSERT_DEBUG(cond)
Definition: exception.hpp:104
void apply_ternary_backprop_operator(El::Matrix< DataT, El::Device::CPU > const &x1, El::Matrix< DataT, El::Device::CPU > const &x2, El::Matrix< DataT, El::Device::CPU > const &x3, El::Matrix< DataT, El::Device::CPU > const &dy, El::Matrix< DataT, El::Device::CPU > &dx1, El::Matrix< DataT, El::Device::CPU > &dx2, El::Matrix< DataT, El::Device::CPU > &dx3, F f)
#define LBANN_OMP_PARALLEL_FOR_COLLAPSE2
Definition: omp_pragma.hpp:68
#define LBANN_OMP_PARALLEL_FOR
Definition: omp_pragma.hpp:67
void apply_binary_backprop_operator(El::Matrix< DataT, El::Device::CPU > const &x1, El::Matrix< DataT, El::Device::CPU > const &x2, El::Matrix< DataT, El::Device::CPU > const &dy, El::Matrix< DataT, El::Device::CPU > &dx1, El::Matrix< DataT, El::Device::CPU > &dx2, F f)