LBANN  0.103.0
LivermoreBigArtificialNeuralNetworkToolkit
cufft_wrapper.hpp
Go to the documentation of this file.
1 // Copyright (c) 2014-2023, Lawrence Livermore National Security, LLC.
3 // Produced at the Lawrence Livermore National Laboratory.
4 // Written by the LBANN Research Team (B. Van Essen, et al.) listed in
5 // the CONTRIBUTORS file. <lbann-dev@llnl.gov>
6 //
7 // LLNL-CODE-697807.
8 // All rights reserved.
9 //
10 // This file is part of LBANN: Livermore Big Artificial Neural Network
11 // Toolkit. For details, see http://software.llnl.gov/LBANN or
12 // https://github.com/LLNL/LBANN.
13 //
14 // Licensed under the Apache License, Version 2.0 (the "Licensee"); you
15 // may not use this file except in compliance with the License. You may
16 // obtain a copy of the License at:
17 //
18 // http://www.apache.org/licenses/LICENSE-2.0
19 //
20 // Unless required by applicable law or agreed to in writing, software
21 // distributed under the License is distributed on an "AS IS" BASIS,
22 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
23 // implied. See the License for the specific language governing
24 // permissions and limitations under the license.
26 #ifndef LBANN_UTILS_CUFFT_WRAPPER_HPP_
27 #define LBANN_UTILS_CUFFT_WRAPPER_HPP_
28 
29 #include <lbann/base.hpp>
33 
34 #include <cufft.h>
35 
36 #define LBANN_CHECK_CUFFT(cmd) \
37  do { \
38  auto const lbann_check_cufft_result_ = (cmd); \
39  if (lbann_check_cufft_result_ != CUFFT_SUCCESS) { \
40  LBANN_ERROR("cuFFT error!\n\n" \
41  " cmd: " #cmd "\n" \
42  " result: ", \
43  lbann::cufft::value_as_string(lbann_check_cufft_result_), \
44  "\n" \
45  " message: ", \
46  lbann::cufft::result_string(lbann_check_cufft_result_), \
47  "\n\n"); \
48  } \
49  } while (0)
50 
51 namespace lbann {
52 namespace cufft {
53 
55 std::string value_as_string(cufftResult_t);
57 std::string result_string(cufftResult_t);
58 
59 template <typename T>
60 struct cuFFTTypeT;
61 
62 template <>
63 struct cuFFTTypeT<float>
64 {
65  using type = float;
66 };
67 template <>
68 struct cuFFTTypeT<double>
69 {
70  using type = double;
71 };
72 template <>
73 struct cuFFTTypeT<El::Complex<float>>
74 {
75  using type = cufftComplex;
76 };
77 template <>
78 struct cuFFTTypeT<El::Complex<double>>
79 {
80  using type = cufftDoubleComplex;
81 };
82 
83 template <typename T>
84 using cuFFTType = typename cuFFTTypeT<T>::type;
85 
86 template <typename T>
87 auto AsCUFFTType(T* buffer)
88 {
89  return reinterpret_cast<cuFFTType<T>*>(buffer);
90 }
91 
93 template <typename InType, typename OutType>
95 
96 template <>
97 struct cuFFTExecutor<El::Complex<float>, El::Complex<float>>
98 {
99  static constexpr auto transform_type = CUFFT_C2C;
100  static void Execute(cufftHandle plan,
101  El::Complex<float>* input_data,
102  El::Complex<float>* output_data,
103  int direction)
104  {
105  LBANN_CHECK_CUFFT(cufftExecC2C(plan,
106  AsCUFFTType(input_data),
107  AsCUFFTType(output_data),
108  direction));
109  }
110 }; // struct cuFFTExecutor<Complex<float>, Complex<float>>
111 
112 template <>
113 struct cuFFTExecutor<El::Complex<double>, El::Complex<double>>
114 {
115  static constexpr auto transform_type = CUFFT_Z2Z;
116  static void Execute(cufftHandle plan,
117  El::Complex<double>* input_data,
118  El::Complex<double>* output_data,
119  int direction)
120  {
121  LBANN_CHECK_CUFFT(cufftExecZ2Z(plan,
122  AsCUFFTType(input_data),
123  AsCUFFTType(output_data),
124  direction));
125  }
126 }; // struct cuFFTExecutor<Complex<double>, Complex<double>>
127 
140 template <typename InputTypeT>
142 {
143 public:
144  using InputType = InputTypeT;
146 
149 
150  using RealMatType = El::Matrix<RealType, El::Device::GPU>;
151  using ComplexMatType = El::Matrix<ComplexType, El::Device::GPU>;
152 
153  using ComplexBufferType = El::simple_buffer<ComplexType, El::Device::GPU>;
154 
155  using InputMatType = El::Matrix<InputType, El::Device::GPU>;
156  using OutputMatType = El::Matrix<OutputType, El::Device::GPU>;
157 
159  using PlanType = cufftHandle;
160 
161 private:
163  {
164  size_t worksize_ = 0ULL;
165  PlanType plan_ = 0;
166  int num_samples_ = -1; // It's just an int in the basic interface
167  InternalPlanType(PlanType plan, size_t worksize, int n)
168  : worksize_{worksize}, plan_{plan}, num_samples_{n}
169  {}
171  {
172  if (plan_ != 0) {
173  cufftDestroy(plan_);
174  plan_ = 0;
175  }
176  }
178  : worksize_{other.worksize_},
179  plan_{other.plan_},
180  num_samples_{other.num_samples_}
181  {
182  other.worksize_ = 0ULL;
183  other.plan_ = 0;
184  other.num_samples_ = -1;
185  }
186  }; // struct InternalPlanType
187 
188 public:
189  cuFFTWrapper() = default;
190  ~cuFFTWrapper() = default;
191  // Movable, not copyable.
192  cuFFTWrapper(cuFFTWrapper&& other) noexcept = default;
193  cuFFTWrapper(cuFFTWrapper const&) = delete;
202  OutputMatType& out,
203  std::vector<int> const& full_dims)
204  {
205  setup_common(in, out, full_dims);
206  }
213  void setup_forward(InputMatType& in, std::vector<int> const& full_dims)
214  {
216  return setup_forward(in, in, full_dims);
217  }
218 
227  InputMatType& out,
228  std::vector<int> const& full_dims)
229  {
230  return setup_common(in, out, full_dims);
231  }
232 
239  void setup_backward(OutputMatType& in, std::vector<int> const& full_dims)
240  {
241  return setup_backward(in, in, full_dims);
242  }
243 
245  {
246  return compute_common(in, out, CUFFT_FORWARD);
247  }
248 
250  {
251  return compute_common(in, in, CUFFT_FORWARD);
252  }
253 
255  {
256  return compute_common(in, out, CUFFT_INVERSE);
257  }
258 
260  {
261  return compute_common(in, in, CUFFT_INVERSE);
262  }
263 
264 private:
265  void compute_common(OutputMatType& in, InputMatType& out, int dir) const
266  {
267  auto const num_samples = in.Width();
268  if (num_samples == 0)
269  return;
270 
271  auto const good_plan =
272  std::find_if(cbegin(plans_),
273  cend(plans_),
274  [num_samples](InternalPlanType const& a) {
275  return a.num_samples_ == num_samples;
276  });
277  if (good_plan == cend(plans_))
278  LBANN_ERROR("No valid cuFFT plan found.");
279 
280  // Setup the workspace
281  ComplexBufferType workspace(good_plan->worksize_,
282  El::SyncInfoFromMatrix(out));
283  LBANN_CHECK_CUFFT(cufftSetWorkArea(good_plan->plan_, workspace.data()));
284 
285  // Run the FFT
286  bool const contiguous_samples = (in.Contiguous()) && (out.Contiguous());
287  if (contiguous_samples) {
288  ExecutorType::Execute(good_plan->plan_, in.Buffer(), out.Buffer(), dir);
289  }
290  else {
291  auto num_batches = in.Width();
292  for (El::Int ii = 0; ii < num_batches; ++ii) {
293  ExecutorType::Execute(good_plan->plan_,
294  in.Buffer() + ii * in.LDim(),
295  out.Buffer() + ii * out.LDim(),
296  dir);
297  }
298  }
299  }
300 
301  template <typename InMatT, typename OutMatT>
302  void setup_common(InMatT& in, OutMatT& out, std::vector<int> const& full_dims)
303  {
304  using in_data_type = typename InMatT::value_type;
305  using out_data_type = typename OutMatT::value_type;
307 
308  // Look for an acceptable plan
309  int const num_samples = in.Width();
310  if (num_samples == 0)
311  return;
312 
313  auto const good_plan =
314  std::find_if(cbegin(plans_),
315  cend(plans_),
316  [num_samples](InternalPlanType const& a) {
317  return a.num_samples_ == num_samples;
318  });
319 
320  // We don't have a plan for this yet; let's create one!
321  if (good_plan == cend(plans_)) {
322  PlanType plan;
323  size_t workspace_size = 0ULL;
324 
325  // This is annoying... I could const_cast, but I'm not 100%
326  // certain cuFFT doesn't change this data.
327  std::vector<int> full_dims_mutable(full_dims);
328  LBANN_CHECK_CUFFT(cufftCreate(&plan));
329  LBANN_CHECK_CUFFT(cufftSetStream(plan, SyncInfoFromMatrix(out).Stream()));
330  // We'll handle our own workspace
331  LBANN_CHECK_CUFFT(cufftSetAutoAllocation(plan, 0));
332 
333  auto const& input_dims = Dims::input_dims(full_dims);
334  auto const& output_dims = Dims::output_dims(full_dims);
335  int const num_feature_maps = full_dims.front();
336  int const feature_map_ndims = full_dims.size() - 1;
337  bool const contiguous_samples = (in.Contiguous()) && (out.Contiguous());
338 
339  if (feature_map_ndims > 3 || feature_map_ndims == 0)
340  LBANN_ERROR("Only 1-, 2-, and 3-D FFTs are supported in cuFFT.");
341 
342  int const input_feature_map_size =
343  get_linear_size(feature_map_ndims, input_dims.data() + 1);
344  int const output_feature_map_size =
345  get_linear_size(feature_map_ndims, output_dims.data() + 1);
346 
347  // Handle the easy case. In this case, all the FFTs to be done
348  // are contiguous in memory. Super! Let's just set it up to do
349  // them all as one big batch.
350  if (contiguous_samples) {
351  int const num_transforms = num_samples * num_feature_maps;
352  LBANN_CHECK_CUFFT(cufftMakePlanMany(plan,
353  feature_map_ndims,
354  full_dims_mutable.data() + 1,
355  nullptr,
356  1,
357  input_feature_map_size,
358  nullptr,
359  1,
360  output_feature_map_size,
361  ExecutorType::transform_type,
362  num_transforms,
363  &workspace_size));
364  }
365  else {
366  // In this case, we apply the FFT to each sample, and, come
367  // execution time, we will loop over the samples. (An
368  // alternative might pick whether to loop over samples or
369  // channels, whichever is fewer in number. However, this is a
370  // book-keeping headache.)
371  int const num_transforms = num_feature_maps;
372  LBANN_CHECK_CUFFT(cufftMakePlanMany(plan,
373  feature_map_ndims,
374  full_dims_mutable.data() + 1,
375  nullptr,
376  1,
377  input_feature_map_size,
378  nullptr,
379  1,
380  output_feature_map_size,
381  ExecutorType::transform_type,
382  num_transforms,
383  &workspace_size));
384  }
385 
386  if (plan == 0)
387  LBANN_ERROR("cuFFT plan construction failed "
388  "but cuFFT reported no errors.");
389 
390  plans_.emplace_back(plan, workspace_size, num_samples);
391  }
392  }
393 
394 private:
395  // These are likely to be so few in number that a linear search is
396  // going to be fine.
397  std::vector<InternalPlanType> plans_;
398 
399 }; // class cuFFTWrapper
400 
401 } // namespace cufft
402 } // namespace lbann
403 #endif // LBANN_UTILS_CUFFT_WRAPPER_HPP_
static void Execute(cufftHandle plan, El::Complex< double > *input_data, El::Complex< double > *output_data, int direction)
typename ToRealT< T >::type ToReal
Definition: fft_common.hpp:50
static void Execute(cufftHandle plan, El::Complex< float > *input_data, El::Complex< float > *output_data, int direction)
void setup_backward(OutputMatType &in, InputMatType &out, std::vector< int > const &full_dims)
Setup the backward (inverse) transform.
auto get_linear_size(std::vector< T > const &dims)
Definition: dim_helpers.hpp:59
typename ToComplexT< T >::type ToComplex
Definition: fft_common.hpp:65
void compute_common(OutputMatType &in, InputMatType &out, int dir) const
#define LBANN_ERROR(...)
Definition: exception.hpp:37
El::Matrix< RealType, El::Device::GPU > RealMatType
void setup_backward(OutputMatType &in, std::vector< int > const &full_dims)
Setup the in-place backward (inverse) transform.
ToComplex< InputType > OutputType
#define LBANN_CHECK_CUFFT(cmd)
void compute_forward(InputMatType &in) const
El::simple_buffer< ComplexType, El::Device::GPU > ComplexBufferType
El::Matrix< ComplexType, El::Device::GPU > ComplexMatType
InternalPlanType(PlanType plan, size_t worksize, int n)
auto AsCUFFTType(T *buffer)
typename cuFFTTypeT< T >::type cuFFTType
void compute_forward(InputMatType &in, OutputMatType &out) const
Wrapper around cuFFT.
InternalPlanType(InternalPlanType &&other) noexcept
void setup_forward(InputMatType &in, OutputMatType &out, std::vector< int > const &full_dims)
Setup the forward transform.
ToReal< InputType > RealType
void compute_backward(OutputMatType &in) const
std::vector< InternalPlanType > plans_
std::string value_as_string(cufftResult_t)
The stringified name of the enumerated value.
ToComplex< InputType > ComplexType
std::string result_string(cufftResult_t)
The docstring for the given result.
void compute_backward(OutputMatType &in, InputMatType &out) const
Alias around the C-compatible API.
El::Matrix< OutputType, El::Device::GPU > OutputMatType
void setup_common(InMatT &in, OutMatT &out, std::vector< int > const &full_dims)
El::Matrix< InputType, El::Device::GPU > InputMatType
void setup_forward(InputMatType &in, std::vector< int > const &full_dims)
Setup an in-place forward transform.