LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
gpu/rocm.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_UTILS_ROCM_HPP
28 #define LBANN_UTILS_ROCM_HPP
29 
31 #include "lbann_config.hpp"
32 
33 #ifdef LBANN_HAS_ROCM
34 
35 #include <hip/hip_runtime.h>
36 #include <thrust/detail/allocator/tagged_allocator.h>
37 #include <thrust/device_vector.h>
38 #include <thrust/memory.h>
39 #include <thrust/system/hip/detail/par.h>
40 #include <thrust/version.h>
41 
42 // -------------------------------------------------------------
43 // Error utility macros
44 // -------------------------------------------------------------
45 #define LBANN_ROCM_SYNC(async) \
46  do { \
47  /* Synchronize GPU and check for errors. */ \
48  hipError_t status_ROCM_SYNC = hipDeviceSynchronize(); \
49  if (status_ROCM_SYNC == hipSuccess) \
50  status_ROCM_SYNC = hipGetLastError(); \
51  if (status_ROCM_SYNC != hipSuccess) { \
52  LBANN_ERROR((async ? "Asynchronous " : ""), \
53  "ROCm error (", \
54  hipGetErrorString(status_ROCM_SYNC), \
55  ")"); \
56  } \
57  } while (0)
58 #define LBANN_ROCM_CHECK_LAST_ERROR(async) \
59  do { \
60  hipError_t status = hipGetLastError(); \
61  if (status != hipSuccess) { \
62  LBANN_ERROR((async ? "Asynchronous " : ""), \
63  "ROCm error (", \
64  hipGetErrorString(status), \
65  ")"); \
66  } \
67  } while (0)
68 #define FORCE_CHECK_ROCM(rocm_call) \
69  do { \
70  /* Call ROCM API routine, synchronizing before and */ \
71  /* after to check for errors. */ \
72  LBANN_ROCM_SYNC(true); \
73  hipError_t status_CHECK_ROCM = (rocm_call); \
74  if (status_CHECK_ROCM != hipSuccess) { \
75  LBANN_ERROR("ROCm error (", hipGetErrorString(status_CHECK_ROCM), ")"); \
76  } \
77  LBANN_ROCM_SYNC(false); \
78  } while (0)
79 #define FORCE_CHECK_ROCM_NOSYNC(rocm_call) \
80  do { \
81  hipError_t status_CHECK_ROCM = (rocm_call); \
82  if (status_CHECK_ROCM != hipSuccess) { \
83  LBANN_ERROR("ROCm error (", hipGetErrorString(status_CHECK_ROCM), ")"); \
84  } \
85  } while (0)
86 #ifdef LBANN_DEBUG
87 #define CHECK_ROCM(rocm_call) FORCE_CHECK_ROCM(rocm_call);
88 #else
89 #define CHECK_ROCM(rocm_call) FORCE_CHECK_ROCM_NOSYNC(rocm_call)
90 #endif // #ifdef LBANN_DEBUG
91 
92 namespace lbann {
93 namespace rocm {
94 
95 constexpr hipMemcpyKind GPU_MEMCPY_DEVICE_TO_DEVICE = hipMemcpyDeviceToDevice;
96 
97 // -------------------------------------------------------------
98 // Wrapper classes
99 // -------------------------------------------------------------
100 
102 class event_wrapper
103 {
104 public:
105  event_wrapper();
106  event_wrapper(const event_wrapper& other);
107  event_wrapper& operator=(const event_wrapper& other);
108  ~event_wrapper();
110  void record(hipStream_t stream);
112  bool query() const;
114  void synchronize();
116  hipEvent_t& get_event();
117 
118 private:
122  hipEvent_t m_event;
126  hipStream_t m_stream;
127 };
128 
129 // -------------------------------------------------------------
130 // Helper functions for tensor operations
131 // -------------------------------------------------------------
132 
134 template <typename TensorDataType>
135 void copy_tensor(hipStream_t stream,
136  const std::vector<size_t>& dims,
137  const TensorDataType* input,
138  const std::vector<size_t>& input_strides,
139  TensorDataType* output,
140  const std::vector<size_t>& output_strides);
141 
142 void mem_copy_async(void* output,
143  const void* input,
144  const size_t count,
145  hipMemcpyKind kind,
146  hipStream_t stream);
147 
148 // -------------------------------------------------------------
149 // Utilities for Thrust
150 // -------------------------------------------------------------
151 namespace thrust {
152 
154 using execute_on_stream = ::thrust::hip_rocprim::execute_on_stream;
155 
160 template <typename T = El::byte>
161 class allocator : public ::thrust::detail::tagged_allocator<
162  T,
163  execute_on_stream,
164  ::thrust::pointer<T, execute_on_stream>>
165 {
166 public:
167  // Convenient typedefs
168  typedef ::thrust::detail::tagged_allocator<
169  T,
170  execute_on_stream,
171  ::thrust::pointer<T, execute_on_stream>>
172  parent_class;
173  typedef typename parent_class::value_type value_type;
174  typedef typename parent_class::pointer pointer;
175  typedef typename parent_class::size_type size_type;
176  typedef typename parent_class::system_type system_type;
177 
179  allocator(hipStream_t stream = hydrogen::rocm::GetDefaultStream());
181  pointer allocate(size_type size);
185  void deallocate(pointer buffer, size_type size = 0);
187  system_type& system();
188 
189 private:
191  hipStream_t m_stream;
193  system_type m_system;
194 };
195 
197 template <typename T>
198 using vector = ::thrust::device_vector<T, allocator<T>>;
199 
200 } // namespace thrust
201 } // namespace rocm
202 } // namespace lbann
203 
204 #endif // LBANN_HAS_ROCM
205 #endif // LBANN_UTILS_ROCM_HPP