26 #ifndef LBANN_UTILS_CUFFT_WRAPPER_HPP_ 27 #define LBANN_UTILS_CUFFT_WRAPPER_HPP_ 36 #define LBANN_CHECK_CUFFT(cmd) \ 38 auto const lbann_check_cufft_result_ = (cmd); \ 39 if (lbann_check_cufft_result_ != CUFFT_SUCCESS) { \ 40 LBANN_ERROR("cuFFT error!\n\n" \ 43 lbann::cufft::value_as_string(lbann_check_cufft_result_), \ 46 lbann::cufft::result_string(lbann_check_cufft_result_), \ 80 using type = cufftDoubleComplex;
93 template <
typename InType,
typename OutType>
99 static constexpr
auto transform_type = CUFFT_C2C;
101 El::Complex<float>* input_data,
102 El::Complex<float>* output_data,
115 static constexpr
auto transform_type = CUFFT_Z2Z;
117 El::Complex<double>* input_data,
118 El::Complex<double>* output_data,
140 template <
typename InputTypeT>
164 size_t worksize_ = 0ULL;
166 int num_samples_ = -1;
168 : worksize_{worksize}, plan_{plan}, num_samples_{n}
178 : worksize_{other.worksize_},
180 num_samples_{other.num_samples_}
182 other.worksize_ = 0ULL;
184 other.num_samples_ = -1;
203 std::vector<int>
const& full_dims)
205 setup_common(in, out, full_dims);
216 return setup_forward(in, in, full_dims);
228 std::vector<int>
const& full_dims)
230 return setup_common(in, out, full_dims);
241 return setup_backward(in, in, full_dims);
246 return compute_common(in, out, CUFFT_FORWARD);
251 return compute_common(in, in, CUFFT_FORWARD);
256 return compute_common(in, out, CUFFT_INVERSE);
261 return compute_common(in, in, CUFFT_INVERSE);
267 auto const num_samples = in.Width();
268 if (num_samples == 0)
271 auto const good_plan =
272 std::find_if(cbegin(plans_),
277 if (good_plan == cend(plans_))
282 El::SyncInfoFromMatrix(out));
286 bool const contiguous_samples = (in.Contiguous()) && (out.Contiguous());
287 if (contiguous_samples) {
288 ExecutorType::Execute(good_plan->plan_, in.Buffer(), out.Buffer(), dir);
291 auto num_batches = in.Width();
292 for (El::Int ii = 0; ii < num_batches; ++ii) {
293 ExecutorType::Execute(good_plan->plan_,
294 in.Buffer() + ii * in.LDim(),
295 out.Buffer() + ii * out.LDim(),
301 template <
typename InMatT,
typename OutMatT>
302 void setup_common(InMatT& in, OutMatT& out, std::vector<int>
const& full_dims)
304 using in_data_type =
typename InMatT::value_type;
305 using out_data_type =
typename OutMatT::value_type;
309 int const num_samples = in.Width();
310 if (num_samples == 0)
313 auto const good_plan =
314 std::find_if(cbegin(plans_),
321 if (good_plan == cend(plans_)) {
323 size_t workspace_size = 0ULL;
327 std::vector<int> full_dims_mutable(full_dims);
333 auto const& input_dims = Dims::input_dims(full_dims);
334 auto const& output_dims = Dims::output_dims(full_dims);
335 int const num_feature_maps = full_dims.front();
336 int const feature_map_ndims = full_dims.size() - 1;
337 bool const contiguous_samples = (in.Contiguous()) && (out.Contiguous());
339 if (feature_map_ndims > 3 || feature_map_ndims == 0)
340 LBANN_ERROR(
"Only 1-, 2-, and 3-D FFTs are supported in cuFFT.");
342 int const input_feature_map_size =
344 int const output_feature_map_size =
350 if (contiguous_samples) {
351 int const num_transforms = num_samples * num_feature_maps;
354 full_dims_mutable.data() + 1,
357 input_feature_map_size,
360 output_feature_map_size,
361 ExecutorType::transform_type,
371 int const num_transforms = num_feature_maps;
374 full_dims_mutable.data() + 1,
377 input_feature_map_size,
380 output_feature_map_size,
381 ExecutorType::transform_type,
388 "but cuFFT reported no errors.");
390 plans_.emplace_back(plan, workspace_size, num_samples);
403 #endif // LBANN_UTILS_CUFFT_WRAPPER_HPP_ static void Execute(cufftHandle plan, El::Complex< double > *input_data, El::Complex< double > *output_data, int direction)
typename ToRealT< T >::type ToReal
static void Execute(cufftHandle plan, El::Complex< float > *input_data, El::Complex< float > *output_data, int direction)
void setup_backward(OutputMatType &in, InputMatType &out, std::vector< int > const &full_dims)
Setup the backward (inverse) transform.
auto get_linear_size(std::vector< T > const &dims)
typename ToComplexT< T >::type ToComplex
void compute_common(OutputMatType &in, InputMatType &out, int dir) const
El::Matrix< RealType, El::Device::GPU > RealMatType
void setup_backward(OutputMatType &in, std::vector< int > const &full_dims)
Setup the in-place backward (inverse) transform.
ToComplex< InputType > OutputType
#define LBANN_CHECK_CUFFT(cmd)
void compute_forward(InputMatType &in) const
El::simple_buffer< ComplexType, El::Device::GPU > ComplexBufferType
El::Matrix< ComplexType, El::Device::GPU > ComplexMatType
InternalPlanType(PlanType plan, size_t worksize, int n)
auto AsCUFFTType(T *buffer)
typename cuFFTTypeT< T >::type cuFFTType
void compute_forward(InputMatType &in, OutputMatType &out) const
InternalPlanType(InternalPlanType &&other) noexcept
void setup_forward(InputMatType &in, OutputMatType &out, std::vector< int > const &full_dims)
Setup the forward transform.
ToReal< InputType > RealType
void compute_backward(OutputMatType &in) const
std::vector< InternalPlanType > plans_
std::string value_as_string(cufftResult_t)
The stringified name of the enumerated value.
ToComplex< InputType > ComplexType
std::string result_string(cufftResult_t)
The docstring for the given result.
void compute_backward(OutputMatType &in, InputMatType &out) const
Alias around the C-compatible API.
El::Matrix< OutputType, El::Device::GPU > OutputMatType
void setup_common(InMatT &in, OutMatT &out, std::vector< int > const &full_dims)
El::Matrix< InputType, El::Device::GPU > InputMatType
void setup_forward(InputMatType &in, std::vector< int > const &full_dims)
Setup an in-place forward transform.