LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
gpu/gpu_lib.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_UTILS_GPULIB_HPP
28 #define LBANN_UTILS_GPULIB_HPP
29 
30 #ifdef LBANN_HAS_GPU
31 
32 namespace lbann {
33 namespace gpu_lib {
34 
35 #if defined LBANN_HAS_CUDA
36 using namespace cuda;
37 #elif defined LBANN_HAS_ROCM
38 using namespace rocm;
39 #endif // LBANN_HAS_CUDA
40 
41 // -------------------------------------------------------------
42 // Device properties
43 // -------------------------------------------------------------
44 
46 dim3 max_grid_dims();
47 
49 void clip_grid_dims(dim3& grid_dims);
50 
51 // -------------------------------------------------------------
52 // Device functions
53 // -------------------------------------------------------------
54 
55 #if defined __CUDACC__ || defined __HIPCC__
56 
57 // Atomic add
58 __device__ __forceinline__ __half atomic_add(__half* address, __half val);
59 __device__ __forceinline__ float atomic_add(float* address, float val);
60 __device__ __forceinline__ double atomic_add(double* address, double val);
61 
74 template <size_t bdimx, size_t bdimy, size_t bdimz, class T>
75 __device__ __forceinline__ T block_reduce(T val);
76 
91 template <size_t bdimx, size_t bdimy, size_t bdimz, class T, class Op>
92 __device__ __forceinline__ T block_reduce(T val);
93 
94 // Unary math functions
95 #define DECLARE_UNARY_MATH_FUNC_WITH_TYPE(func, type) \
96  __device__ __forceinline__ type func(type const& x)
97 #define DECLARE_UNARY_MATH_FUNC(func) \
98  DECLARE_UNARY_MATH_FUNC_WITH_TYPE(func, __half); \
99  DECLARE_UNARY_MATH_FUNC_WITH_TYPE(func, float); \
100  DECLARE_UNARY_MATH_FUNC_WITH_TYPE(func, double)
101 template <typename T>
102 __device__ __forceinline__ T abs(const T& x);
103 __device__ __forceinline__ float abs(float const& x);
104 __device__ __forceinline__ double abs(double const& x);
105 DECLARE_UNARY_MATH_FUNC(round);
106 DECLARE_UNARY_MATH_FUNC(ceil);
107 DECLARE_UNARY_MATH_FUNC(floor);
108 DECLARE_UNARY_MATH_FUNC(sqrt);
109 DECLARE_UNARY_MATH_FUNC(rsqrt);
110 DECLARE_UNARY_MATH_FUNC(exp);
111 DECLARE_UNARY_MATH_FUNC(expm1);
112 DECLARE_UNARY_MATH_FUNC(log);
113 DECLARE_UNARY_MATH_FUNC(log1p);
114 DECLARE_UNARY_MATH_FUNC(cos);
115 DECLARE_UNARY_MATH_FUNC(sin);
116 DECLARE_UNARY_MATH_FUNC(tan);
117 DECLARE_UNARY_MATH_FUNC(acos);
118 DECLARE_UNARY_MATH_FUNC(asin);
119 DECLARE_UNARY_MATH_FUNC(atan);
120 DECLARE_UNARY_MATH_FUNC(cosh);
121 DECLARE_UNARY_MATH_FUNC(sinh);
122 DECLARE_UNARY_MATH_FUNC(tanh);
123 DECLARE_UNARY_MATH_FUNC(acosh);
124 DECLARE_UNARY_MATH_FUNC(asinh);
125 DECLARE_UNARY_MATH_FUNC(atanh);
126 DECLARE_UNARY_MATH_FUNC(erf);
127 DECLARE_UNARY_MATH_FUNC(erfinv);
128 template <typename T>
129 __device__ __forceinline__ bool isfinite(const T& x);
130 template <typename T>
131 __device__ __forceinline__ bool isinf(const T& x);
132 template <typename T>
133 __device__ __forceinline__ bool isnan(const T& x);
134 #undef DECLARE_UNARY_MATH_FUNC
135 #undef DECLARE_UNARY_MATH_FUNC_WITH_TYPE
136 
137 // Binary math functions
138 #define DECLARE_BINARY_UNARY_MATH_FUNC_WITH_TYPE(func, type) \
139  __device__ __forceinline__ type func(type const& x, type const& y)
140 #define DECLARE_BINARY_UNARY_MATH_FUNC(func) \
141  DECLARE_BINARY_UNARY_MATH_FUNC_WITH_TYPE(func, __half); \
142  DECLARE_BINARY_UNARY_MATH_FUNC_WITH_TYPE(func, float); \
143  DECLARE_BINARY_UNARY_MATH_FUNC_WITH_TYPE(func, double)
144 template <typename T>
145 __device__ __forceinline__ T min(const T& x, const T& y);
146 DECLARE_BINARY_UNARY_MATH_FUNC(min);
147 template <typename T>
148 __device__ __forceinline__ T max(const T& x, const T& y);
149 DECLARE_BINARY_UNARY_MATH_FUNC(max);
150 DECLARE_BINARY_UNARY_MATH_FUNC(mod);
151 DECLARE_BINARY_UNARY_MATH_FUNC(pow);
152 #undef DECLARE_BINARY_UNARY_MATH_FUNC
153 #undef DECLARE_BINARY_UNARY_MATH_FUNC_WITH_TYPE
154 
155 // Numeric limits
156 template <typename T>
157 constexpr __device__ __forceinline__ T min();
158 template <typename T>
159 constexpr __device__ __forceinline__ T max();
160 template <typename T>
161 constexpr __device__ __forceinline__ T epsilon();
162 template <typename T>
163 __device__ __forceinline__ T infinity();
164 
166 template <typename T, size_t N>
167 struct array
168 {
169  T vals[N];
170  __host__ __device__ __forceinline__ size_t size() const;
171  __host__ __device__ __forceinline__ T& operator[](size_t i);
172  __host__ __device__ __forceinline__ const T& operator[](size_t i) const;
173 };
174 
175 #endif // __CUDACC__ || __HIPCC__
176 
177 // -------------------------------------------------------------
178 // Helper functions for tensor operations
179 // -------------------------------------------------------------
180 
181 #if defined __CUDACC__ || defined __HIPCC__
182 
187 template <template <typename> class UnaryOperator, typename TensorDataType>
189  const El::AbstractMatrix<TensorDataType>& input,
190  El::AbstractMatrix<TensorDataType>& output);
191 
196 template <template <typename> class BinaryOperator, typename TensorDataType>
198  const El::AbstractMatrix<TensorDataType>& input1,
199  const El::AbstractMatrix<TensorDataType>& input2,
200  El::AbstractMatrix<TensorDataType>& output);
201 
206 template <template <typename> class UnaryOperator, typename TensorDataType>
208  const El::AbstractDistMatrix<TensorDataType>& input,
209  El::AbstractDistMatrix<TensorDataType>& output);
210 
215 template <template <typename> class BinaryOperator, typename TensorDataType>
217  const El::AbstractDistMatrix<TensorDataType>& input1,
218  const El::AbstractDistMatrix<TensorDataType>& input2,
219  El::AbstractDistMatrix<TensorDataType>& output);
220 
221 #endif // __CUDACC__ || __HIPCC__
222 
223 } // namespace gpu_lib
224 } // namespace lbann
225 
226 // Header implementations
227 #include "lbann/utils/impl/cuda.hpp"
229 #include "lbann/utils/impl/rocm.hpp"
230 
231 #endif // LBANN_HAS_GPU
232 #endif // LBANN_UTILS_GPULIB_HPP
void apply_entrywise_binary_operator(const El::AbstractMatrix< TensorDataType > &input1, const El::AbstractMatrix< TensorDataType > &input2, El::AbstractMatrix< TensorDataType > &output)
void clip_grid_dims(dim3 &grid_dims)
void apply_entrywise_unary_operator(const El::AbstractMatrix< TensorDataType > &input, El::AbstractMatrix< TensorDataType > &output)