EMAN2
emfft.cpp
Go to the documentation of this file.
1/*
2 * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
3 * Copyright (c) 2000-2006 Baylor College of Medicine
4 *
5 * This software is issued under a joint BSD/GNU license. You may use the
6 * source code in this file under either license. However, note that the
7 * complete EMAN2 and SPARX software packages have some GPL dependencies,
8 * so you are responsible for compliance with the licenses of these packages
9 * if you opt to use BSD licensing. The warranty disclaimer below holds
10 * in either instance.
11 *
12 * This complete copyright notice must be included in any revised version of the
13 * source code. Additional authorship citations may be added, but existing
14 * author citations must be preserved.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 * */
31
32#include <string>
33#include <cstring>
34#include <complex>
35#include "emfft.h"
36#include "log.h"
37
38#include <iostream>
39using std::cout;
40using std::endl;
41
42#include "util.h"
43
44#ifdef EMAN2_USING_CUDA
45#include "cuda/cuda_emfft.h"
46#endif
47
48#ifdef USE_DJBFFT
49extern "C" {
50 #include <fftr4.h>
51}
52#endif //USE_DJBFFT
53
54using namespace EMAN;
55
56namespace {
57 int get_rank(int ny, int nz)
58 {
59 int rank = 3;
60 if (ny == 1) {
61 rank = 1;
62 }
63 else if (nz == 1) {
64 rank = 2;
65 }
66 return rank;
67 }
68}
69#ifdef _WIN32
71#else
72pthread_mutex_t fft_mutex=PTHREAD_MUTEX_INITIALIZER;
73#endif
74
75#ifdef FFTW_PLAN_CACHING
76// The only thing important about these constants is that they don't equal each other
77// Why isn't this an enum? --steve
78const int EMfft::EMAN2_REAL_2_COMPLEX = 1;
79const int EMfft::EMAN2_COMPLEX_2_REAL = 2;
80const int EMfft::EMAN2_COMPLEX_2_COMPLEX = 3;
81
82
83#ifdef USE_FFTW3
84EMfft::EMfftw3_cache::EMfftw3_cache() :
85 num_plans(0)
86{
87 // NOTE 2018/11/13 Toshio Moriya:
88 // Modified for Pawel
89 clear_plans();
90
91 /*
92 for(int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
93 {
94 rank[i] = 0;
95 plan_dims[i][0] = 0; plan_dims[i][1] = 0; plan_dims[i][2] = 0;
96 r2c[i] = -1;
97 ip[i] = -1;
98 fftwplans[i] = NULL;
99 }
100 */
101}
102
103void EMfft::EMfftw3_cache::debug_plans()
104{
105 for(int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
106 {
107 cout << "Plan " << i << " has dims " << plan_dims[i][0] << " "
108 << plan_dims[i][1] << " " <<
109 plan_dims[i][2] << ", rank " <<
110 rank[i] << ", rc flag "
111 << r2c[i] << ", ip flag " << ip[i] << endl;
112 }
113}
114
115EMfft::EMfftw3_cache::~EMfftw3_cache()
116{
117 // NOTE 2018/11/13 Toshio Moriya:
118 // Modified for Pawel
119 destroy_plans();
120
121 /*
122 for(int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
123 {
124 if (fftwplans[i] != NULL)
125 {
126 int mrt = Util::MUTEX_LOCK(&fft_mutex);
127 fftwf_destroy_plan(fftwplans[i]);
128 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
129 fftwplans[i] = NULL;
130 }
131 }
132 */
133}
134
135// NOTE 2018/11/13 Toshio Moriya:
136// Added for Pawel
137void EMfft::EMfftw3_cache::clear_plans()
138{
139 // NOTE 2018/11/13 Toshio Moriya:
140 // Debug output to make sure of EMfft::initialize_plan_cache is working
141 //cout << "MRK_DEBUG: EMfft::clear_plans is executed\n";
142
143 for(int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
144 {
145 rank[i] = 0;
146 plan_dims[i][0] = 0; plan_dims[i][1] = 0; plan_dims[i][2] = 0;
147 r2c[i] = -1;
148 ip[i] = -1;
149 fftwplans[i] = NULL;
150 }
151}
152
153// NOTE 2018/11/13 Toshio Moriya:
154// Added for Pawel
155void EMfft::EMfftw3_cache::destroy_plans()
156{
157 // NOTE 2018/11/13 Toshio Moriya:
158 // Debug output to make sure of EMfft::initialize_plan_cache is working
159 //cout << "MRK_DEBUG: EMfft::EMfftw3_cache destroy_plans is executed\n";
160
161 for(int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
162 {
163 if (fftwplans[i] != NULL)
164 {
165 int mrt = Util::MUTEX_LOCK(&fft_mutex);
166 fftwf_destroy_plan(fftwplans[i]);
167 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
168 fftwplans[i] = NULL;
169 }
170 }
171}
172
173fftwf_plan EMfft::EMfftw3_cache::get_plan(const int rank_in, const int x, const int y, const int z, const int r2c_flag, const int ip_flag, fftwf_complex* complex_data, float* real_data )
174{
175
176 if ( rank_in > 3 || rank_in < 1 ) throw InvalidValueException(rank_in, "Error, can not get an FFTW plan using rank out of the range [1,3]");
177 if ( r2c_flag != EMAN2_REAL_2_COMPLEX && r2c_flag != EMAN2_COMPLEX_2_REAL && r2c_flag != EMAN2_COMPLEX_2_COMPLEX ) throw InvalidValueException(r2c_flag, "The selected real to complex flag is not supported");
178
179// static int num_added = 0;
180// cout << "Was asked for " << rank_in << " " << x << " " << y << " " << z << " " << r2c_flag << endl;
181
182 int dims[3];
183 dims[0] = z;
184 dims[1] = y;
185 dims[2] = x;
186
187 int mrt = Util::MUTEX_LOCK(&fft_mutex);
188
189 // First check to see if we already have the plan
190 int i;
191 for (i=0; i<num_plans; i++) {
192 if (plan_dims[i][0]==x && plan_dims[i][1]==y && plan_dims[i][2]==z && rank[i]==rank_in && r2c[i]==r2c_flag && ip[i]==ip_flag) {
193 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
194 return fftwplans[i];
195 }
196 }
197
198 fftwf_plan plan;
199 // Create the plan
200 if ( y == 1 && z == 1 )
201 {
202 if ( r2c_flag == EMAN2_REAL_2_COMPLEX )
203 plan = fftwf_plan_dft_r2c_1d(x, real_data, complex_data, FFTW_ESTIMATE);
204 else if ( r2c_flag == EMAN2_COMPLEX_2_REAL )
205 plan = fftwf_plan_dft_c2r_1d(x, complex_data, real_data, FFTW_ESTIMATE);
206 else {
207 // This ONLY makes plans for inplace 1D C->C Forward
208 float *tmp=(float *)malloc(sizeof(float)*x*2);
209 memcpy(tmp,complex_data,sizeof(float)*x*2);
210 // technically FFTW_ESTIMATE isn't supposed to mess with the input data, but the manual advises playing it safe
211 plan = fftwf_plan_dft_1d(x,complex_data,complex_data,FFTW_FORWARD,FFTW_ESTIMATE);
212 memcpy(complex_data,tmp,sizeof(float)*x*2);
213 free(tmp);
214 }
215 }
216 else
217 {
218 if ( r2c_flag == EMAN2_REAL_2_COMPLEX )
219 plan = fftwf_plan_dft_r2c(rank_in, dims + (3 - rank_in), real_data, complex_data, FFTW_ESTIMATE);
220 else if ( r2c_flag == EMAN2_COMPLEX_2_REAL)
221 plan = fftwf_plan_dft_c2r(rank_in, dims + (3 - rank_in), complex_data, real_data, FFTW_ESTIMATE);
222 else {
223 // This ONLY makes plans for inplace 2D/3D C->C Forward
224 float *tmp=(float *)malloc(sizeof(float)*x*2*y*z);
225 memcpy(tmp,complex_data,sizeof(float)*x*2*y*z);
226 // technically FFTW_ESTIMATE isn't supposed to mess with the input data, but the manual advises playing it safe
227 plan = fftwf_plan_dft(rank_in, dims + (3 - rank_in), complex_data, complex_data, FFTW_FORWARD, FFTW_ESTIMATE); // in place!
228 memcpy(complex_data,tmp,sizeof(float)*x*2*y*z);
229 free(tmp);
230
231 }
232 }
233
234 if (fftwplans[EMFFTW3_CACHE_SIZE-1] != NULL )
235 {
236 fftwf_destroy_plan(fftwplans[EMFFTW3_CACHE_SIZE-1]);
237 fftwplans[EMFFTW3_CACHE_SIZE-1] = NULL;
238 }
239
240 int upper_limit = num_plans;
241 if ( upper_limit == EMFFTW3_CACHE_SIZE ) upper_limit -= 1;
242 for (int i=upper_limit-1; i>0; i--)
243 {
244 fftwplans[i]=fftwplans[i-1];
245 rank[i]=rank[i-1];
246 r2c[i]=r2c[i-1];
247 ip[i]=ip[i-1];
248 plan_dims[i][0]=plan_dims[i-1][0];
249 plan_dims[i][1]=plan_dims[i-1][1];
250 plan_dims[i][2]=plan_dims[i-1][2];
251 }
252 //dimplan[0]=-1;
253
254 plan_dims[0][0]=x;
255 plan_dims[0][1]=y;
256 plan_dims[0][2]=z;
257 r2c[0]=r2c_flag;
258 ip[0]=ip_flag;
259 fftwplans[0] = plan;
260 rank[0]=rank_in;
261 if (num_plans<EMFFTW3_CACHE_SIZE) num_plans++;
262// debug_plans();
263// cout << "Created plan 0" << endl;
264// ++num_added;
265// cout << "I have created " << num_added << " plans" << endl;
266 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
267 return fftwplans[0];
268
269}
270
271// Static init
272EMfft::EMfftw3_cache EMfft::plan_cache;
273
274#endif // USE_FFTW3
275
276#endif // FFTW_PLAN_CACHING
277
278//#ifdef CUDA_FFT
279//int EMfft::real_to_complex_1d(float *real_data, float *complex_data, int n)
280//{
281// return cuda_fft_real_to_complex_1d(real_data,complex_data,n);
282//}
283
284//int EMfft::complex_to_real_1d(float *complex_data, float *real_data, int n)
285//{
286// return cuda_fft_complex_to_real_1d(complex_data,real_data,n);
287//}
288
289//int EMfft::real_to_complex_nd(float *real_data, float *complex_data, int nx, int ny, int nz)
290//{
291// return cuda_fft_real_to_complex_nd(real_data,complex_data,nx,ny,nz);
292//}
293
294//int EMfft::complex_to_real_nd(float *complex_data, float *real_data, int nx, int ny, int nz)
295//{
296// return cuda_fft_complex_to_real_nd(complex_data,real_data,nx,ny,nz);
297//}
298
299//#endif
300
301#ifdef USE_FFTW3
302
303int EMfft::real_to_complex_1d(float *real_data, float *complex_data, int n)
304{//cout<<"doing fftw3"<<endl;
305#ifdef FFTW_PLAN_CACHING
306 bool ip = ( complex_data == real_data );
307 fftwf_plan plan = plan_cache.get_plan(1,n,1,1,EMAN2_REAL_2_COMPLEX,ip,(fftwf_complex *) complex_data, real_data);
308 // According to FFTW3, this is making use of the "guru" interface - this is necessary if plans are to be reused
309 fftwf_execute_dft_r2c(plan, real_data,(fftwf_complex *) complex_data);
310#else
311 int mrt = Util::MUTEX_LOCK(&fft_mutex);
312 fftwf_plan plan = fftwf_plan_dft_r2c_1d(n, real_data, (fftwf_complex *) complex_data,
313 FFTW_ESTIMATE);
314 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
315
316 fftwf_execute(plan);
317 mrt = Util::MUTEX_LOCK(&fft_mutex);
318 fftwf_destroy_plan(plan);
319 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
320#endif // FFTW_PLAN_CACHING
321 return 0;
322};
323
324int EMfft::complex_to_real_1d(float *complex_data, float *real_data, int n)
325{
326#ifdef FFTW_PLAN_CACHING
327 bool ip = ( complex_data == real_data );
328 fftwf_plan plan = plan_cache.get_plan(1,n,1,1,EMAN2_COMPLEX_2_REAL,ip,(fftwf_complex *) complex_data, real_data);
329 // According to FFTW3, this is making use of the "guru" interface - this is necessary if plans are to be reused
330 fftwf_execute_dft_c2r(plan, (fftwf_complex *) complex_data, real_data);
331#else
332 int mrt = Util::MUTEX_LOCK(&fft_mutex);
333 fftwf_plan plan = fftwf_plan_dft_c2r_1d(n, (fftwf_complex *) complex_data, real_data,FFTW_ESTIMATE);
334 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
335 fftwf_execute(plan);
336 mrt = Util::MUTEX_LOCK(&fft_mutex);
337 fftwf_destroy_plan(plan);
338 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
339#endif // FFTW_PLAN_CACHING
340
341 return 0;
342}
343
344int EMfft::complex_to_complex_1d_inplace(std::complex<float> *complex_data, int n)
345{
346#ifdef FFTW_PLAN_CACHING
347 fftwf_plan plan = plan_cache.get_plan(1,n/2,1,1,EMAN2_COMPLEX_2_COMPLEX,1,(fftwf_complex *) complex_data,NULL);
348 fftwf_execute_dft(plan, (fftwf_complex *) complex_data,(fftwf_complex *) complex_data);
349#else
350 printf("ERROR: 1-D in place C2C FFT broken without caching");
351// fftwf_plan p;
352// fftwf_complex *in=(fftwf_complex *) complex_data_in;
353// fftwf_complex *out=(fftwf_complex *) complex_data_out;
354// int mrt = Util::MUTEX_LOCK(&fft_mutex);
355// // This shouldn't be unsafe with FFTW_ESTIMATE?
356// p=fftwf_plan_dft_1d(n/2,(fftwf_complex *) complex_data,(fftwf_complex *) complex_data, FFTW_FORWARD, FFTW_ESTIMATE);
357// mrt = Util::MUTEX_UNLOCK(&fft_mutex);
358// fftwf_execute(p);
359// mrt = Util::MUTEX_LOCK(&fft_mutex);
360// fftwf_destroy_plan(p);
361// mrt = Util::MUTEX_UNLOCK(&fft_mutex);
362#endif
363 return 0;
364}
365
366// ming add c->c fft with fftw3 library//
367int EMfft::complex_to_complex_1d_f(float *complex_data_in, float *complex_data_out, int n)
368{
369 fftwf_plan p;
370 fftwf_complex *in=(fftwf_complex *) complex_data_in;
371 fftwf_complex *out=(fftwf_complex *) complex_data_out;
372 int mrt = Util::MUTEX_LOCK(&fft_mutex);
373 p=fftwf_plan_dft_1d(n/2,in,out, FFTW_FORWARD, FFTW_ESTIMATE);
374 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
375 fftwf_execute(p);
376 mrt = Util::MUTEX_LOCK(&fft_mutex);
377 fftwf_destroy_plan(p);
378 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
379 return 0;
380}
381
382// ming add c->c fft with fftw3 library//
383int EMfft::complex_to_complex_1d_b(float *complex_data_in, float *complex_data_out, int n)
384{
385 fftwf_plan p;
386 fftwf_complex *in=(fftwf_complex *) complex_data_in;
387 fftwf_complex *out=(fftwf_complex *) complex_data_out;
388 int mrt = Util::MUTEX_LOCK(&fft_mutex);
389 p=fftwf_plan_dft_1d(n/2,in,out, FFTW_BACKWARD, FFTW_ESTIMATE);
390 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
391 fftwf_execute(p);
392 mrt = Util::MUTEX_LOCK(&fft_mutex);
393 fftwf_destroy_plan(p);
394 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
395 return 0;
396}
397
398int EMfft::complex_to_complex_2d_inplace(std::complex<float> *complex_data, int nx,int ny)
399{
400#ifdef FFTW_PLAN_CACHING
401 fftwf_plan plan = plan_cache.get_plan(2,nx/2,ny,1,EMAN2_COMPLEX_2_COMPLEX,1,(fftwf_complex *) complex_data,NULL);
402 fftwf_execute_dft(plan, (fftwf_complex *) complex_data,(fftwf_complex *) complex_data);
403#else
404 printf("ERROR: 2-D in place C2C FFT broken without caching");
405// fftwf_plan p;
406// fftwf_complex *in=(fftwf_complex *) complex_data_in;
407// fftwf_complex *out=(fftwf_complex *) complex_data_out;
408// int mrt = Util::MUTEX_LOCK(&fft_mutex);
409// // This shouldn't be unsafe with FFTW_ESTIMATE?
410// p=fftwf_plan_dft(n/2,(fftwf_complex *) complex_data,(fftwf_complex *) complex_data, FFTW_FORWARD, FFTW_ESTIMATE);
411// mrt = Util::MUTEX_UNLOCK(&fft_mutex);
412// fftwf_execute(p);
413// mrt = Util::MUTEX_LOCK(&fft_mutex);
414// fftwf_destroy_plan(p);
415// mrt = Util::MUTEX_UNLOCK(&fft_mutex);
416#endif
417 return 0;
418}
419
420int EMfft::complex_to_complex_nd(float *in, float *out, int nx,int ny,int nz)
421{
422 const int rank = get_rank(ny, nz);
423 int dims[3];
424 dims[0] = nz;
425 dims[1] = ny;
426 dims[2] = nx;
427
428 switch(rank) {
429 case 1:
430 complex_to_complex_1d_f(in, out, nx);
431 break;
432
433 case 2:
434 case 3:
435 {
436 fftwf_plan p;
437
438 int mrt = Util::MUTEX_LOCK(&fft_mutex);
439
440 if(out == in) {
441 p=fftwf_plan_dft_3d(nx/2,ny,nz,(fftwf_complex *) in,(fftwf_complex *) out, FFTW_FORWARD, FFTW_ESTIMATE);
442 }
443 else {
444
445 p=fftwf_plan_dft_3d(nx/2,ny,nz,(fftwf_complex *) in,(fftwf_complex *) out, FFTW_FORWARD, FFTW_ESTIMATE);
446 }
447 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
448
449 fftwf_execute(p);
450
451 mrt = Util::MUTEX_LOCK(&fft_mutex);
452 fftwf_destroy_plan(p);
453 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
454
455 }
456 }
457 return 0;
458}
459// end ming
460
461
462int EMfft::real_to_complex_nd(float *real_data, float *complex_data, int nx, int ny, int nz)
463{
464 const int rank = get_rank(ny, nz);
465 int dims[3];
466 dims[0] = nz;
467 dims[1] = ny;
468 dims[2] = nx;
469
470 switch(rank) {
471 case 1:
472 real_to_complex_1d(real_data, complex_data, nx);
473 break;
474
475 case 2:
476 case 3:
477 {
478#ifdef FFTW_PLAN_CACHING
479 bool ip = ( complex_data == real_data );
480 fftwf_plan plan = plan_cache.get_plan(rank,nx,ny,nz,EMAN2_REAL_2_COMPLEX,ip,(fftwf_complex *) complex_data, real_data);
481 // According to FFTW3, this is making use of the "guru" interface - this is necessary if plans are to be re-used
482 fftwf_execute_dft_r2c(plan, real_data,(fftwf_complex *) complex_data );
483#else
484 int mrt = Util::MUTEX_LOCK(&fft_mutex);
485 fftwf_plan plan = fftwf_plan_dft_r2c(rank, dims + (3 - rank),
486 real_data, (fftwf_complex *) complex_data, FFTW_ESTIMATE);
487 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
488
489 fftwf_execute(plan);
490
491 mrt = Util::MUTEX_LOCK(&fft_mutex);
492 fftwf_destroy_plan(plan);
493 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
494
495#endif // FFTW_PLAN_CACHING
496 }
497 break;
498
499 default:
500 LOGERR("Should NEVER be here!!!");
501 break;
502 }
503
504 return 0;
505}
506
507int EMfft::complex_to_real_nd(float *complex_data, float *real_data, int nx, int ny, int nz)
508{
509 const int rank = get_rank(ny, nz);
510 int dims[3];
511 dims[0] = nz;
512 dims[1] = ny;
513 dims[2] = nx;
514
515 switch(rank) {
516 case 1:
517 complex_to_real_1d(complex_data, real_data, nx);
518 break;
519
520 case 2:
521 case 3:
522 {
523#ifdef FFTW_PLAN_CACHING
524 bool ip = ( complex_data == real_data );
525 fftwf_plan plan = plan_cache.get_plan(rank,nx,ny,nz,EMAN2_COMPLEX_2_REAL,ip,(fftwf_complex *) complex_data, real_data);
526 // According to FFTW3, this is making use of the "guru" interface - this is necessary if plans are to be re-used
527 fftwf_execute_dft_c2r(plan, (fftwf_complex *) complex_data, real_data);
528#else
529 int mrt = Util::MUTEX_LOCK(&fft_mutex);
530 fftwf_plan plan = fftwf_plan_dft_c2r(rank, dims + (3 - rank),
531 (fftwf_complex *) complex_data, real_data, FFTW_ESTIMATE);
532 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
533
534 fftwf_execute(plan);
535
536 mrt = Util::MUTEX_LOCK(&fft_mutex);
537 fftwf_destroy_plan(plan);
538 mrt = Util::MUTEX_UNLOCK(&fft_mutex);
539
540#endif // FFTW_PLAN_CACHING
541
542
543 }
544 break;
545
546 default:
547 LOGERR("Should NEVER be here!!!");
548 break;
549 }
550
551 return 0;
552}
553
554
555// NOTE 2018/11/13 Toshio Moriya:
556// Added for Pawel so that he re-initialize EMfftw3_cache plan_cache through this function
557// This function is available only when USE_FFTW3 is defined.
558int EMfft::initialize_plan_cache()
559{
560 plan_cache.destroy_plans();
561 plan_cache.clear_plans();
562 return 0;
563}
564
565#endif //USE_FFTW3
566
567#ifdef NATIVE_FFT
568#include "sparx/native_fft.h"
569int EMfft::real_to_complex_1d(float *real_data, float *complex_data, int n)
570{
571 //int complex_size = n + 2 - n%2;
572 float * work = (float*) malloc((2*n+15)*sizeof(float));
573 if (!work) {
574 fprintf(stderr,"real_to_complex_1d: failed to allocate work\n");
575 LOGERR("real_to_complex_1d: failed to allocate work\n");
576 }
577
578 rffti(n, work);
579 memcpy(&complex_data[1], real_data, n * sizeof(float));
580 rfftf(n, &complex_data[1], work);
581 complex_data[0] = complex_data[1] ;
582 complex_data[1] = 0.0f ;
583 if (n%2 == 0) complex_data[n+1] = 0.0f ;
584
585 free(work);
586 return 0;
587}
588/*{
589 //int complex_size = n + 2 - n%2;
590
591 //memcpy(complex_data, real_data, n * sizeof(float));
592 //float * work = (float*) malloc((2*n+15)*sizeof(float));
593 //if (!work) {
594 // fprintf(stderr,"real_to_complex_1d: failed to allocate work\n");
595 // LOGERR("real_to_complex_1d: failed to allocate work\n");
596 //}
597 //Nativefft::fmrs_1rf(complex_data, work, n);
598 //cout<<"doing rfftf"<<endl;
599 //rffti(n, work);
600
601 //rfftf(n, real_data, work);
602
603 //cout<<"doing fftr_q"<<endl;
604 int l=(int)(log2(n));
605 Util::fftr_q(real_data,l);
606
607 //free(work);
608 return 0;
609}//
610{
611 int complex_size = n + 2 - n%2;
612
613 memcpy(complex_data, real_data, n * sizeof(float));
614 float * work = (float*) malloc(complex_size*sizeof(float));
615 if (!work) {
616 fprintf(stderr,"real_to_complex_1d: failed to allocate work\n");
617 LOGERR("real_to_complex_1d: failed to allocate work\n");
618 }
619
620 Nativefft::fmrs_1rf(complex_data, work, n);
621
622 free(work);
623 return 0;
624}*/
625
626int EMfft::complex_to_real_1d(float *complex_data, float *real_data, int n)
627{
628 //here, n is the "logical" size of DFT, not the array size
629 float * work = (float*) malloc((2*n+15)*sizeof(float));
630 if (!work) {
631 fprintf(stderr,"real_to_complex_1d: failed to allocate work\n");
632 LOGERR("complex_to_real_1d: failed to allocate work\n");
633 }
634
635 rffti(n, work);
636
637 memcpy(&real_data[1], &complex_data[2], (n-1) * sizeof(float));
638 real_data[0] = complex_data[0] ;
639 rfftb(n, real_data, work);
640 // Normalize
641 float nrm = 1.0f/float(n);
642 for (int i = 0; i<n; i++) real_data[i] *= nrm;
643 free(work);
644 return 0;
645}
646/*{
647 int complex_size = n + 2 - n%2;
648
649 //here, n is the "logical" size of DFT, not the array size
650 memcpy(real_data, complex_data, complex_size * sizeof(float));
651 float * work = (float*)malloc(complex_size*sizeof(float));
652 if (!work) {
653 fprintf(stderr,"real_to_complex_1d: failed to allocate work\n");
654 LOGERR("complex_to_real_1d: failed to allocate work\n");
655 }
656
657 Nativefft::fmrs_1rb(real_data, work, n);
658
659 free(work);
660 return 0;
661}*/
662
663int EMfft::real_to_complex_nd(float *real_data, float *complex_data, int nx, int ny, int nz)
664{
665 const int rank = get_rank(ny, nz);
666 const int complex_nx = nx + 2 - nx%2;
667
668 switch(rank) {
669 case 1: //for 1D fft
670 real_to_complex_1d(real_data, complex_data, nx);
671 return 0;
672 case 2: //for 2D fft
673 {
674 /*if(real_data != complex_data) {
675 for (int j = 0; j < ny; j++) {
676 memcpy(&complex_data[complex_nx*j], &real_data[nx*j], nx*sizeof(float));
677 }
678 }
679 float * work = (float*) malloc(complex_nx*sizeof(float));
680 if (!work) {
681 fprintf(stderr,"real_to_complex_nd(2df): failed to allocate work\n");
682 LOGERR("real_to_complex_nd(2df): failed to allocate work\n");
683 }
684
685 // 2d inplace fft, overwrite y
686 int status = Nativefft::fmrs_2rf(complex_data, work, complex_nx, nx, ny);
687 if (status !=0) {
688 fprintf(stderr, "real_to_complex_nd(2df): status = %d\n", status);
689 LOGWARN("real_to_complex_nd(2df): status = %d\n", status);
690 }
691
692 free(work);*/
693
694 int status = Nativefft::ftp_2rf(real_data, complex_data, complex_nx, nx, ny);
695 if (status !=0) {
696 fprintf(stderr, "real_to_complex_nd(2df): status = %d\n", status);
697 LOGWARN("real_to_complex_nd(2df): status = %d\n", status);
698 }
699 return 0;
700 }
701 case 3: //for 3D fft
702 {
703 /*if(real_data != complex_data) {
704 for (int k = 0; k<nz; k++) {
705 for (int j = 0; j < ny; j++) {
706 memcpy(&complex_data[complex_nx*ny*k+j*complex_nx], &real_data[nx*ny*k+j*nx], nx*sizeof(float));
707 }
708 }
709 }
710 float * work = (float*) malloc(1*sizeof(float));//malloc(complex_nx*sizeof(float));
711 if (!work) {
712 fprintf(stderr,"real_to_complex_nd(3df): failed to allocate work\n");
713 LOGERR("real_to_complex_nd(3df): failed to allocate work\n");
714 }*/
715
716 // 3d inplace fft, overwrite complex_data
717 int status = Nativefft::ftp_3rf(real_data, complex_data, complex_nx, nx, ny, nz);
718 if (status !=0) {
719 fprintf(stderr, "real_to_complex_nd(3df): status = %d\n", status);
720 LOGWARN("real_to_complex_nd(3df): status = %d\n", status);
721 }
722
723 //free(work);
724 return 0;
725 }
726 default:
727 LOGERR("Never should be here...\n");
728 return -1;
729 }
730}
731
732int EMfft::complex_to_real_nd(float *complex_data, float *real_data, int nx, int ny, int nz)
733{
734 const int rank = get_rank(ny, nz);
735 const int complex_nx = nx + 2 - nx%2;
736
737 switch(rank) {
738 case 1: //for 1D ift
739 complex_to_real_1d(complex_data, real_data, nx);
740 return 0;
741 case 2: //for 2D ift
742 /*{
743 if(real_data != complex_data) {
744 memcpy(real_data, complex_data, complex_nx*ny*sizeof(float));
745 }
746
747 float * work = (float*) malloc(complex_nx*sizeof(float));
748 if (!work) {
749 fprintf(stderr,"complex_to_real_nd(2db): failed to allocate work\n");
750 LOGERR("complex_to_real_nd(2db): failed to allocate work\n");
751 }
752
753 // 2d inplace ift, overwrite real_data
754 int status = Nativefft::fmrs_2rb(real_data, work, complex_nx, nx, ny);
755 if (status !=0) {
756 fprintf(stderr, "complex_to_real_nd(2db): status = %d\n", status);
757 LOGWARN("complex_to_real_nd(2db): status = %d\n", status);
758 }
759
760 free(work);
761 return 0;
762 }*/
763 { // Only out of place!
764 memcpy(real_data, complex_data, complex_nx*ny*sizeof(float));
765
766 // 2d inplace ift, overwrite real_data
767 int status = Nativefft::ftp_2rb(real_data, complex_nx, nx, ny);
768
769 if (status !=0) {
770 fprintf(stderr, "complex_to_real_nd(2db): status = %d\n", status);
771 LOGWARN("complex_to_real_nd(2db): status = %d\n", status);
772 }
773 return 0;
774 }
775 case 3: //for 3D ift
776 { // Only out of place!
777 memcpy(real_data, complex_data, complex_nx*ny*nz*sizeof(float));
778
779 // 3d inplace fft, overwrite real_data
780 int status = Nativefft::ftp_3rb(real_data, complex_nx, nx, ny, nz);
781 if (status !=0) {
782 fprintf(stderr, "complex_to_real_nd(3db): status = %d\n", status);
783 LOGWARN("complex_to_real_nd(3db): status = %d\n", status);
784 }
785 return 0;
786 }
787 default:
788 LOGERR("Never should be here...\n");
789 return -1;
790 }
791}
792#endif //NATIVE_FFT
793
794#ifdef USE_ACML
795#include <iostream>
796using std::cout;
797using std::endl;
798
799int EMfft::real_to_complex_1d(float *real_data, float *complex_data, int n)
800{
801 const int complex_n = n + 2 - n%2; //the size for 1D complex array
802 int info;
803
804 /* Allocate communication work array */
805 float * comm = (float *)malloc((3*n+100)*sizeof(float));
806 /* Allocate work array to store ACML complex array*/
807 float * fft_data = (float *)malloc(n * sizeof(float));
808
809 //copy real_data to complex_data then apply inplace FFT on complex data
810 memcpy(fft_data, real_data, n * sizeof(float));
811
812 /* Initialize communication work array */
813 scfft(0, n, fft_data, comm, &info);
814 if(info != 0) {
815 LOGERR("Error happening in Initialize communication work array: %d", info);
816 }
817
818 /* Compute a real --> Hermitian transform */
819 scfft(1, n, fft_data, comm, &info);
820 if(info != 0) {
821 LOGERR("Error happening in Initialize communication work array: %d", info);
822 }
823
830 transform(fft_data, fft_data+n, fft_data, time_sqrt_n(n));
831
832 for(int i=0; i<complex_n; ++i) {
833 if(i%2==0) { //copy real part of complex array
834 complex_data[i] = fft_data[i/2];
835 }
836 else { //copy imaginary part of complex array
837 if(i==1) {
838 complex_data[i] = 0.0f;
839 }
840 else {
841 if(n%2 == 0 && i == complex_n-1 ) {
842 complex_data[i] = 0.0f;
843 }
844 else {
845 complex_data[i] = fft_data[n-i/2];
846 }
847 }
848 }
849 }
850
851 free(fft_data);
852 free(comm);
853 return 0;
854}
855
856int EMfft::complex_to_real_1d(float *complex_data, float *real_data, int n)
857{
858 int complex_n = n + 2 - n%2; //the size for 1D complex array
859 int info;
860
861 /* Allocate communication work array */
862 float * comm = (float *)malloc((3*n+100)*sizeof(float));
863
864 for(int i=0; i<complex_n; ++i) {
865 if(i%2 == 0) { //copy real part of complex array
866 real_data[i/2] = complex_data[i];
867 }
868 else { //copy imaginary part of complex array
869 if(i==1) {continue;}
870 if(!(n%2 == 0 && i == complex_n-1)) {
871 real_data[n-i/2] = complex_data[i];
872 }
873 }
874 }
875 transform(real_data, real_data+n, real_data, divide_sqrt_n(n));
876
877 /* Initialize communication work array */
878 csfft(0, n, real_data, comm, &info);
879 if(info != 0) {
880 LOGERR("Error happening in Initialize communication work array: %d", info);
881 }
882
883 /* Conjugate the Vector X to simulate inverse transform */
884 for (int j = n/2+1; j < n; j++) {
885 real_data[j] = -real_data[j];
886 }
887
888 /* Compute a Hermitian --> real transform */
889 csfft(1, n, real_data, comm, &info);
890 if(info != 0) {
891 LOGERR("Error happening in Initialize communication work array: %d", info);
892 }
893
894 free(comm);
895 return 0;
896}
897
898int EMfft::real_to_complex_2d(float *real_data, float *complex_data, int nx, int ny)
899{
900 const int complex_nx = nx + 2 - nx%2;
901 int info;
902 /* Allocate communication work array */
903 float * comm = (float *)malloc((3*nx+100)*sizeof(float));
904
905 /* Allocate work array to store ACML complex array*/
906 float * fft_data = (float *)malloc(complex_nx * ny * sizeof(float));
907 cout << "fft_data after allocation:" << endl;
908 for(int j=0; j<ny; ++j) {
909 for(int i=0; i<complex_nx; ++i) {
910 cout << "data[" << i << "][" << j << "] = " << fft_data[i+j*complex_nx] << "\t";
911 }
912 cout << endl;
913 }
914
915 /*copy real_data to complex_data then apply inplace FFT on complex data*/
916 for(int i=0; i<ny; ++i) {
917 memcpy(fft_data+i*complex_nx, real_data+i*nx, nx*sizeof(float));
918 }
919 //memcpy(fft_data, real_data, nx * ny * sizeof(float));
920 cout << endl << "real_data array: " << endl;
921 for(int j=0; j<ny; ++j) {
922 for(int i=0; i<nx; ++i) {
923 cout << real_data[i+j*nx] << "\t";
924 }
925 cout << endl;
926 }
927 cout << endl << "the fft_data array: " << endl;
928 for(int j=0; j<ny; ++j) {
929 for(int i=0; i<complex_nx; ++i) {
930 cout << fft_data[i+j*complex_nx] << "\t";
931 }
932 cout << endl;
933 }
934
935 //do a multiple 1d real-to-complex transform on x direction
936 scfftm(ny, nx, fft_data, comm, &info);
937
938 cout << endl << "the fft_data array after x dim transform: " << endl;
939 for(int j=0; j<ny; ++j) {
940 for(int i=0; i<complex_nx; ++i) {
941 cout << fft_data[i+j*complex_nx] << "\t";
942 }
943 cout << endl;
944 }
945
946/* cout << "original fft array" << endl;
947/ cout << "complex_nx = " << complex_nx << " ny = " << n*(fft_data2+i+j*2*ny+1) << endl;
948 for(int i=0; i<ny; ++i) {
949 for(int j=0; j<complex_nx; ++j) {
950 cout << *(fft_data+j+i*complex_nx) << "\t";
951 }
952 cout << endl;
953 }
954*/
955 //do a multiple 1d complex to complex transformation on y direction
956 float * fft_data2 = (float *)malloc(complex_nx * ny * sizeof(float));
957/* cout << "fft array rearranged in y dimension" << endl;
958 for(int i=0; i<complex_nx; ++i) {
959 for(int j=0; j<ny; ++j) {
960 *(fft_data2+i+j*2*ny) = *(fft_data+i+j*complex_nx);
961 *(fft_data2+i+j*2*ny+1) = *(fft_data+i+complex_nx/2+j*complex_nx);
962 cout << *(fft_data2+i+j*2*ny) << "\t" << *(fft_data2+i+j*2*ny+1) << "\t";
963 }
964 cout << endl;
965 }
966*/
967 if(info != 0) {
968 LOGERR("Error happening in scfftm: %d", info);
969 }
970
971 return 0;
972}
973
974int EMfft::complex_to_real_2d(float *complex_data, float *real_data, int nx, int ny)
975{
976 return 0;
977}
978
979int EMfft::real_to_complex_3d(float *real_data, float *complex_data, int nx, int ny, int nz)
980{
981 return 0;
982}
983
984int EMfft::complex_to_real_3d(float *complex_data, float *real_data, int nx, int ny, int nz)
985{
986 return 0;
987}
988
989int EMfft::real_to_complex_nd(float *real_data, float *complex_data, int nx, int ny, int nz)
990{
991 const int rank = get_rank(ny, nz);
992
993 switch(rank) {
994 case 1:
995 return real_to_complex_1d(real_data, complex_data, nx);
996 case 2:
997 return real_to_complex_2d(real_data, complex_data, nx, ny);
998 case 3:
999 return real_to_complex_3d(real_data, complex_data, nx, ny, nz);
1000 default:
1001 LOGERR("Never should be here...\n");
1002 return -1;
1003 }
1004}
1005
1006int EMfft::complex_to_real_nd(float *complex_data, float *real_data, int nx, int ny, int nz)
1007{
1008 const int rank = get_rank(ny, nz);
1009
1010 switch(rank) {
1011 case 1:
1012 return complex_to_real_1d(complex_data, real_data, nx);
1013 case 2:
1014 return complex_to_real_2d(complex_data, real_data, nx, ny);
1015 case 3:
1016 return complex_to_real_3d(complex_data, real_data, nx, ny, nz);
1017 default:
1018 LOGERR("Never should be here...\n");
1019 return -1;
1020 }
1021}
1022
1023
1024#endif //USE_ACML
pthread_mutex_t fft_mutex
Definition: emfft.cpp:72
#define InvalidValueException(val, desc)
Definition: exception.h:285
#define LOGWARN
Definition: log.h:53
#define LOGERR
Definition: log.h:51
E2Exception class.
Definition: aligner.h:40
#define y(i, j)
Definition: projector.cpp:1516
#define x(i)
Definition: projector.cpp:1517
#define MUTEX
Definition: util.h:43