27 #ifndef LBANN_UTILS_ROCM_HPP 28 #define LBANN_UTILS_ROCM_HPP 31 #include "lbann_config.hpp" 35 #include <hip/hip_runtime.h> 36 #include <thrust/detail/allocator/tagged_allocator.h> 37 #include <thrust/device_vector.h> 38 #include <thrust/memory.h> 39 #include <thrust/system/hip/detail/par.h> 40 #include <thrust/version.h> 45 #define LBANN_ROCM_SYNC(async) \ 48 hipError_t status_ROCM_SYNC = hipDeviceSynchronize(); \ 49 if (status_ROCM_SYNC == hipSuccess) \ 50 status_ROCM_SYNC = hipGetLastError(); \ 51 if (status_ROCM_SYNC != hipSuccess) { \ 52 LBANN_ERROR((async ? "Asynchronous " : ""), \ 54 hipGetErrorString(status_ROCM_SYNC), \ 58 #define LBANN_ROCM_CHECK_LAST_ERROR(async) \ 60 hipError_t status = hipGetLastError(); \ 61 if (status != hipSuccess) { \ 62 LBANN_ERROR((async ? "Asynchronous " : ""), \ 64 hipGetErrorString(status), \ 68 #define FORCE_CHECK_ROCM(rocm_call) \ 72 LBANN_ROCM_SYNC(true); \ 73 hipError_t status_CHECK_ROCM = (rocm_call); \ 74 if (status_CHECK_ROCM != hipSuccess) { \ 75 LBANN_ERROR("ROCm error (", hipGetErrorString(status_CHECK_ROCM), ")"); \ 77 LBANN_ROCM_SYNC(false); \ 79 #define FORCE_CHECK_ROCM_NOSYNC(rocm_call) \ 81 hipError_t status_CHECK_ROCM = (rocm_call); \ 82 if (status_CHECK_ROCM != hipSuccess) { \ 83 LBANN_ERROR("ROCm error (", hipGetErrorString(status_CHECK_ROCM), ")"); \ 87 #define CHECK_ROCM(rocm_call) FORCE_CHECK_ROCM(rocm_call); 89 #define CHECK_ROCM(rocm_call) FORCE_CHECK_ROCM_NOSYNC(rocm_call) 90 #endif // #ifdef LBANN_DEBUG 95 constexpr hipMemcpyKind GPU_MEMCPY_DEVICE_TO_DEVICE = hipMemcpyDeviceToDevice;
106 event_wrapper(
const event_wrapper& other);
107 event_wrapper& operator=(
const event_wrapper& other);
110 void record(hipStream_t stream);
116 hipEvent_t& get_event();
126 hipStream_t m_stream;
134 template <
typename TensorDataType>
135 void copy_tensor(hipStream_t stream,
136 const std::vector<size_t>& dims,
137 const TensorDataType* input,
138 const std::vector<size_t>& input_strides,
139 TensorDataType* output,
140 const std::vector<size_t>& output_strides);
142 void mem_copy_async(
void* output,
154 using execute_on_stream = ::thrust::hip_rocprim::execute_on_stream;
160 template <
typename T = El::byte>
161 class allocator :
public ::thrust::detail::tagged_allocator<
164 ::thrust::pointer<T, execute_on_stream>>
168 typedef ::thrust::detail::tagged_allocator<
171 ::thrust::pointer<T, execute_on_stream>>
173 typedef typename parent_class::value_type value_type;
174 typedef typename parent_class::pointer pointer;
175 typedef typename parent_class::size_type size_type;
176 typedef typename parent_class::system_type system_type;
179 allocator(hipStream_t stream = hydrogen::rocm::GetDefaultStream());
181 pointer allocate(size_type size);
185 void deallocate(pointer buffer, size_type size = 0);
187 system_type& system();
191 hipStream_t m_stream;
193 system_type m_system;
197 template <
typename T>
198 using vector = ::thrust::device_vector<T, allocator<T>>;
204 #endif // LBANN_HAS_ROCM 205 #endif // LBANN_UTILS_ROCM_HPP