LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
nvshmem.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 
27 #ifndef LBANN_UTILS_NVSHMEM_HPP_INCLUDED
28 #define LBANN_UTILS_NVSHMEM_HPP_INCLUDED
29 
30 #include "lbann/base.hpp"
31 #ifdef LBANN_HAS_NVSHMEM
34 #include <mpi.h>
35 #define NVSHMEM_USE_NCCL
36 #include <nvshmem.h>
37 #include <nvshmemx.h>
38 
39 namespace lbann {
40 namespace nvshmem {
41 
43 bool is_initialized() noexcept;
44 
46 bool is_finalized() noexcept;
47 
53 bool is_active() noexcept;
54 
61 void initialize(MPI_Comm comm = MPI_COMM_WORLD);
62 
68 void finalize();
69 
74 template <typename T = void>
75 T* malloc(size_t size);
76 
81 template <typename T = void>
82 T* realloc(T* ptr, size_t size);
83 
84 } // namespace nvshmem
85 } // namespace lbann
86 
87 // =============================================
88 // Implementation
89 // =============================================
90 
91 namespace lbann {
92 namespace nvshmem {
93 
94 template <typename T>
95 T* malloc(size_t size)
96 {
97  initialize();
98  if (size == 0) {
99  return nullptr;
100  }
101  CHECK_CUDA(cudaDeviceSynchronize());
102  auto* ptr = nvshmem_malloc(size * sizeof(T));
103  if (ptr == nullptr) {
104  LBANN_ERROR("NVSHMEM failed to allocate a GPU buffer ",
105  "from the symmetric heap ",
106  "(requested ",
107  size,
108  " B)");
109  }
110  return reinterpret_cast<T*>(ptr);
111 }
112 
113 template <typename T>
114 T* realloc(T* ptr, size_t size)
115 {
116  initialize();
117 
119  if (ptr != nullptr) {
120  nvshmem_free(ptr);
121  }
122  return malloc<T>(size);
123 }
124 
125 } // namespace nvshmem
126 } // namespace lbann
127 
128 #endif // LBANN_HAS_NVSHMEM
129 
130 #endif // LBANN_UTILS_NVSHMEM_HPP_INCLUDED
#define LBANN_ERROR(...)
Definition: exception.hpp:37
world_comm_ptr initialize(int &argc, char **&argv)
void finalize(lbann_comm *comm=nullptr)