26 #ifndef LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED 27 #define LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED 37 template <
typename S,
typename T,
typename U,
typename F>
39 El::Matrix<T, El::Device::CPU>
const& B,
40 El::Matrix<U, El::Device::CPU>& C,
44 auto const m = A.Height();
45 auto const n = A.Width();
53 S
const* ABuf = A.LockedBuffer();
54 T
const* BBuf = B.LockedBuffer();
57 auto const ALDim = A.LDim();
58 auto const BLDim = B.LDim();
59 auto const CLDim = C.LDim();
65 for (El::Int i = 0; i < m; ++i) {
66 CBuf[i] = func(ABuf[i], BBuf[i]);
70 EL_PARALLEL_FOR_COLLAPSE2
71 for (El::Int j = 0; j < n; ++j) {
72 for (El::Int i = 0; i < m; ++i) {
73 CBuf[i + j * CLDim] = func(ABuf[i + j * ALDim], BBuf[i + j * BLDim]);
87 template <
typename DataT,
typename F>
89 El::Matrix<DataT, El::Device::CPU>
const& x1,
90 El::Matrix<DataT, El::Device::CPU>
const& x2,
91 El::Matrix<DataT, El::Device::CPU>
const& dy,
92 El::Matrix<DataT, El::Device::CPU>& dx1,
93 El::Matrix<DataT, El::Device::CPU>& dx2,
97 if (x1.Contiguous() && x2.Contiguous() && dy.Contiguous() &&
98 dx1.Contiguous() && dx2.Contiguous()) {
99 const auto* x1_buffer = x1.LockedBuffer();
100 const auto* x2_buffer = x2.LockedBuffer();
101 const auto* dy_buffer = dy.LockedBuffer();
102 auto* dx1_buffer = dx1.Buffer();
103 auto* dx2_buffer = dx2.Buffer();
104 const size_t size = x1.Height() * x1.Width();
106 for (
size_t i = 0; i < size; ++i) {
107 f(x1_buffer[i], x2_buffer[i], dy_buffer[i], dx1_buffer[i], dx2_buffer[i]);
111 auto const width = x1.Width();
112 auto const height = x1.Height();
114 for (El::Int jj = 0; jj < width; ++jj) {
115 for (El::Int ii = 0; ii < height; ++ii) {
116 f(x1(ii, jj), x2(ii, jj), dy(ii, jj), dx1(ii, jj), dx2(ii, jj));
124 template <
typename S,
typename T,
typename U,
typename R,
typename F>
126 El::Matrix<T, El::Device::CPU>
const& B,
127 El::Matrix<U, El::Device::CPU>
const& C,
128 El::Matrix<R, El::Device::CPU>& D,
132 auto const m = A.Height();
133 auto const n = A.Width();
144 S
const* ABuf = A.LockedBuffer();
145 T
const* BBuf = B.LockedBuffer();
146 U
const* CBuf = C.LockedBuffer();
147 R* DBuf = D.Buffer();
149 auto const ALDim = A.LDim();
150 auto const BLDim = B.LDim();
151 auto const CLDim = C.LDim();
152 auto const DLDim = D.LDim();
158 for (El::Int i = 0; i < m; ++i) {
159 DBuf[i] = func(ABuf[i], BBuf[i], CBuf[i]);
163 EL_PARALLEL_FOR_COLLAPSE2
164 for (El::Int j = 0; j < n; ++j) {
165 for (El::Int i = 0; i < m; ++i) {
166 DBuf[i + j * DLDim] =
167 func(ABuf[i + j * ALDim], BBuf[i + j * BLDim], CBuf[i + j * CLDim]);
181 template <
typename DataT,
typename F>
183 El::Matrix<DataT, El::Device::CPU>
const& x1,
184 El::Matrix<DataT, El::Device::CPU>
const& x2,
185 El::Matrix<DataT, El::Device::CPU>
const& x3,
186 El::Matrix<DataT, El::Device::CPU>
const& dy,
187 El::Matrix<DataT, El::Device::CPU>& dx1,
188 El::Matrix<DataT, El::Device::CPU>& dx2,
189 El::Matrix<DataT, El::Device::CPU>& dx3,
193 if (x1.Contiguous() && x2.Contiguous() && x3.Contiguous() &&
194 dy.Contiguous() && dx1.Contiguous() && dx2.Contiguous() &&
196 const auto* x1_buffer = x1.LockedBuffer();
197 const auto* x2_buffer = x2.LockedBuffer();
198 const auto* x3_buffer = x3.LockedBuffer();
199 const auto* dy_buffer = dy.LockedBuffer();
200 auto* dx1_buffer = dx1.Buffer();
201 auto* dx2_buffer = dx2.Buffer();
202 auto* dx3_buffer = dx3.Buffer();
203 const size_t size = x1.Height() * x1.Width();
205 for (
size_t i = 0; i < size; ++i) {
216 auto const width = x1.Width();
217 auto const height = x1.Height();
219 for (El::Int jj = 0; jj < width; ++jj) {
220 for (El::Int ii = 0; ii < height; ++ii) {
235 #endif // LBANN_SRC_OPERATORS_MATH_COMMON_HPP_INCLUDED #define LBANN_CALIPER_MARK_FUNCTION
void EntrywiseZipInto(El::Matrix< S, El::Device::CPU > const &A, El::Matrix< T, El::Device::CPU > const &B, El::Matrix< U, El::Device::CPU > &C, F func)
A binary entrywise map c <- f(a,b).
#define LBANN_ASSERT_DEBUG(cond)
void apply_ternary_backprop_operator(El::Matrix< DataT, El::Device::CPU > const &x1, El::Matrix< DataT, El::Device::CPU > const &x2, El::Matrix< DataT, El::Device::CPU > const &x3, El::Matrix< DataT, El::Device::CPU > const &dy, El::Matrix< DataT, El::Device::CPU > &dx1, El::Matrix< DataT, El::Device::CPU > &dx2, El::Matrix< DataT, El::Device::CPU > &dx3, F f)
#define LBANN_OMP_PARALLEL_FOR_COLLAPSE2
#define LBANN_OMP_PARALLEL_FOR
void apply_binary_backprop_operator(El::Matrix< DataT, El::Device::CPU > const &x1, El::Matrix< DataT, El::Device::CPU > const &x2, El::Matrix< DataT, El::Device::CPU > const &dy, El::Matrix< DataT, El::Device::CPU > &dx1, El::Matrix< DataT, El::Device::CPU > &dx2, F f)