44#ifdef EMAN2_USING_CUDA
57 int get_rank(
int ny,
int nz)
75#ifdef FFTW_PLAN_CACHING
78const int EMfft::EMAN2_REAL_2_COMPLEX = 1;
79const int EMfft::EMAN2_COMPLEX_2_REAL = 2;
80const int EMfft::EMAN2_COMPLEX_2_COMPLEX = 3;
84EMfft::EMfftw3_cache::EMfftw3_cache() :
103void EMfft::EMfftw3_cache::debug_plans()
105 for(
int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
107 cout <<
"Plan " << i <<
" has dims " << plan_dims[i][0] <<
" "
108 << plan_dims[i][1] <<
" " <<
109 plan_dims[i][2] <<
", rank " <<
110 rank[i] <<
", rc flag "
111 << r2c[i] <<
", ip flag " << ip[i] << endl;
115EMfft::EMfftw3_cache::~EMfftw3_cache()
137void EMfft::EMfftw3_cache::clear_plans()
143 for(
int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
146 plan_dims[i][0] = 0; plan_dims[i][1] = 0; plan_dims[i][2] = 0;
155void EMfft::EMfftw3_cache::destroy_plans()
161 for(
int i = 0; i < EMFFTW3_CACHE_SIZE; ++i)
163 if (fftwplans[i] != NULL)
166 fftwf_destroy_plan(fftwplans[i]);
173fftwf_plan EMfft::EMfftw3_cache::get_plan(
const int rank_in,
const int x,
const int y,
const int z,
const int r2c_flag,
const int ip_flag, fftwf_complex* complex_data,
float* real_data )
176 if ( rank_in > 3 || rank_in < 1 )
throw InvalidValueException(rank_in,
"Error, can not get an FFTW plan using rank out of the range [1,3]");
177 if ( r2c_flag != EMAN2_REAL_2_COMPLEX && r2c_flag != EMAN2_COMPLEX_2_REAL && r2c_flag != EMAN2_COMPLEX_2_COMPLEX )
throw InvalidValueException(r2c_flag,
"The selected real to complex flag is not supported");
191 for (i=0; i<num_plans; i++) {
192 if (plan_dims[i][0]==
x && plan_dims[i][1]==
y && plan_dims[i][2]==z && rank[i]==rank_in && r2c[i]==r2c_flag && ip[i]==ip_flag) {
200 if (
y == 1 && z == 1 )
202 if ( r2c_flag == EMAN2_REAL_2_COMPLEX )
203 plan = fftwf_plan_dft_r2c_1d(
x, real_data, complex_data, FFTW_ESTIMATE);
204 else if ( r2c_flag == EMAN2_COMPLEX_2_REAL )
205 plan = fftwf_plan_dft_c2r_1d(
x, complex_data, real_data, FFTW_ESTIMATE);
208 float *tmp=(
float *)malloc(
sizeof(
float)*
x*2);
209 memcpy(tmp,complex_data,
sizeof(
float)*
x*2);
211 plan = fftwf_plan_dft_1d(
x,complex_data,complex_data,FFTW_FORWARD,FFTW_ESTIMATE);
212 memcpy(complex_data,tmp,
sizeof(
float)*
x*2);
218 if ( r2c_flag == EMAN2_REAL_2_COMPLEX )
219 plan = fftwf_plan_dft_r2c(rank_in, dims + (3 - rank_in), real_data, complex_data, FFTW_ESTIMATE);
220 else if ( r2c_flag == EMAN2_COMPLEX_2_REAL)
221 plan = fftwf_plan_dft_c2r(rank_in, dims + (3 - rank_in), complex_data, real_data, FFTW_ESTIMATE);
224 float *tmp=(
float *)malloc(
sizeof(
float)*
x*2*
y*z);
225 memcpy(tmp,complex_data,
sizeof(
float)*
x*2*
y*z);
227 plan = fftwf_plan_dft(rank_in, dims + (3 - rank_in), complex_data, complex_data, FFTW_FORWARD, FFTW_ESTIMATE);
228 memcpy(complex_data,tmp,
sizeof(
float)*
x*2*
y*z);
234 if (fftwplans[EMFFTW3_CACHE_SIZE-1] != NULL )
236 fftwf_destroy_plan(fftwplans[EMFFTW3_CACHE_SIZE-1]);
237 fftwplans[EMFFTW3_CACHE_SIZE-1] = NULL;
240 int upper_limit = num_plans;
241 if ( upper_limit == EMFFTW3_CACHE_SIZE ) upper_limit -= 1;
242 for (
int i=upper_limit-1; i>0; i--)
244 fftwplans[i]=fftwplans[i-1];
248 plan_dims[i][0]=plan_dims[i-1][0];
249 plan_dims[i][1]=plan_dims[i-1][1];
250 plan_dims[i][2]=plan_dims[i-1][2];
261 if (num_plans<EMFFTW3_CACHE_SIZE) num_plans++;
272EMfft::EMfftw3_cache EMfft::plan_cache;
303int EMfft::real_to_complex_1d(
float *real_data,
float *complex_data,
int n)
305#ifdef FFTW_PLAN_CACHING
306 bool ip = ( complex_data == real_data );
307 fftwf_plan plan = plan_cache.get_plan(1,n,1,1,EMAN2_REAL_2_COMPLEX,ip,(fftwf_complex *) complex_data, real_data);
309 fftwf_execute_dft_r2c(plan, real_data,(fftwf_complex *) complex_data);
312 fftwf_plan plan = fftwf_plan_dft_r2c_1d(n, real_data, (fftwf_complex *) complex_data,
318 fftwf_destroy_plan(plan);
324int EMfft::complex_to_real_1d(
float *complex_data,
float *real_data,
int n)
326#ifdef FFTW_PLAN_CACHING
327 bool ip = ( complex_data == real_data );
328 fftwf_plan plan = plan_cache.get_plan(1,n,1,1,EMAN2_COMPLEX_2_REAL,ip,(fftwf_complex *) complex_data, real_data);
330 fftwf_execute_dft_c2r(plan, (fftwf_complex *) complex_data, real_data);
333 fftwf_plan plan = fftwf_plan_dft_c2r_1d(n, (fftwf_complex *) complex_data, real_data,FFTW_ESTIMATE);
337 fftwf_destroy_plan(plan);
344int EMfft::complex_to_complex_1d_inplace(std::complex<float> *complex_data,
int n)
346#ifdef FFTW_PLAN_CACHING
347 fftwf_plan plan = plan_cache.get_plan(1,n/2,1,1,EMAN2_COMPLEX_2_COMPLEX,1,(fftwf_complex *) complex_data,NULL);
348 fftwf_execute_dft(plan, (fftwf_complex *) complex_data,(fftwf_complex *) complex_data);
350 printf(
"ERROR: 1-D in place C2C FFT broken without caching");
367int EMfft::complex_to_complex_1d_f(
float *complex_data_in,
float *complex_data_out,
int n)
370 fftwf_complex *in=(fftwf_complex *) complex_data_in;
371 fftwf_complex *out=(fftwf_complex *) complex_data_out;
373 p=fftwf_plan_dft_1d(n/2,in,out, FFTW_FORWARD, FFTW_ESTIMATE);
377 fftwf_destroy_plan(p);
383int EMfft::complex_to_complex_1d_b(
float *complex_data_in,
float *complex_data_out,
int n)
386 fftwf_complex *in=(fftwf_complex *) complex_data_in;
387 fftwf_complex *out=(fftwf_complex *) complex_data_out;
389 p=fftwf_plan_dft_1d(n/2,in,out, FFTW_BACKWARD, FFTW_ESTIMATE);
393 fftwf_destroy_plan(p);
398int EMfft::complex_to_complex_2d_inplace(std::complex<float> *complex_data,
int nx,
int ny)
400#ifdef FFTW_PLAN_CACHING
401 fftwf_plan plan = plan_cache.get_plan(2,nx/2,ny,1,EMAN2_COMPLEX_2_COMPLEX,1,(fftwf_complex *) complex_data,NULL);
402 fftwf_execute_dft(plan, (fftwf_complex *) complex_data,(fftwf_complex *) complex_data);
404 printf(
"ERROR: 2-D in place C2C FFT broken without caching");
420int EMfft::complex_to_complex_nd(
float *in,
float *out,
int nx,
int ny,
int nz)
422 const int rank = get_rank(ny, nz);
430 complex_to_complex_1d_f(in, out, nx);
441 p=fftwf_plan_dft_3d(nx/2,ny,nz,(fftwf_complex *) in,(fftwf_complex *) out, FFTW_FORWARD, FFTW_ESTIMATE);
445 p=fftwf_plan_dft_3d(nx/2,ny,nz,(fftwf_complex *) in,(fftwf_complex *) out, FFTW_FORWARD, FFTW_ESTIMATE);
452 fftwf_destroy_plan(p);
462int EMfft::real_to_complex_nd(
float *real_data,
float *complex_data,
int nx,
int ny,
int nz)
464 const int rank = get_rank(ny, nz);
472 real_to_complex_1d(real_data, complex_data, nx);
478#ifdef FFTW_PLAN_CACHING
479 bool ip = ( complex_data == real_data );
480 fftwf_plan plan = plan_cache.get_plan(rank,nx,ny,nz,EMAN2_REAL_2_COMPLEX,ip,(fftwf_complex *) complex_data, real_data);
482 fftwf_execute_dft_r2c(plan, real_data,(fftwf_complex *) complex_data );
485 fftwf_plan plan = fftwf_plan_dft_r2c(rank, dims + (3 - rank),
486 real_data, (fftwf_complex *) complex_data, FFTW_ESTIMATE);
492 fftwf_destroy_plan(plan);
500 LOGERR(
"Should NEVER be here!!!");
507int EMfft::complex_to_real_nd(
float *complex_data,
float *real_data,
int nx,
int ny,
int nz)
509 const int rank = get_rank(ny, nz);
517 complex_to_real_1d(complex_data, real_data, nx);
523#ifdef FFTW_PLAN_CACHING
524 bool ip = ( complex_data == real_data );
525 fftwf_plan plan = plan_cache.get_plan(rank,nx,ny,nz,EMAN2_COMPLEX_2_REAL,ip,(fftwf_complex *) complex_data, real_data);
527 fftwf_execute_dft_c2r(plan, (fftwf_complex *) complex_data, real_data);
530 fftwf_plan plan = fftwf_plan_dft_c2r(rank, dims + (3 - rank),
531 (fftwf_complex *) complex_data, real_data, FFTW_ESTIMATE);
537 fftwf_destroy_plan(plan);
547 LOGERR(
"Should NEVER be here!!!");
558int EMfft::initialize_plan_cache()
560 plan_cache.destroy_plans();
561 plan_cache.clear_plans();
568#include "sparx/native_fft.h"
569int EMfft::real_to_complex_1d(
float *real_data,
float *complex_data,
int n)
572 float * work = (
float*) malloc((2*n+15)*
sizeof(float));
574 fprintf(stderr,
"real_to_complex_1d: failed to allocate work\n");
575 LOGERR(
"real_to_complex_1d: failed to allocate work\n");
579 memcpy(&complex_data[1], real_data, n *
sizeof(
float));
580 rfftf(n, &complex_data[1], work);
581 complex_data[0] = complex_data[1] ;
582 complex_data[1] = 0.0f ;
583 if (n%2 == 0) complex_data[n+1] = 0.0f ;
626int EMfft::complex_to_real_1d(
float *complex_data,
float *real_data,
int n)
629 float * work = (
float*) malloc((2*n+15)*
sizeof(float));
631 fprintf(stderr,
"real_to_complex_1d: failed to allocate work\n");
632 LOGERR(
"complex_to_real_1d: failed to allocate work\n");
637 memcpy(&real_data[1], &complex_data[2], (n-1) *
sizeof(
float));
638 real_data[0] = complex_data[0] ;
639 rfftb(n, real_data, work);
641 float nrm = 1.0f/float(n);
642 for (
int i = 0; i<n; i++) real_data[i] *= nrm;
663int EMfft::real_to_complex_nd(
float *real_data,
float *complex_data,
int nx,
int ny,
int nz)
665 const int rank = get_rank(ny, nz);
666 const int complex_nx = nx + 2 - nx%2;
670 real_to_complex_1d(real_data, complex_data, nx);
694 int status = Nativefft::ftp_2rf(real_data, complex_data, complex_nx, nx, ny);
696 fprintf(stderr,
"real_to_complex_nd(2df): status = %d\n", status);
697 LOGWARN(
"real_to_complex_nd(2df): status = %d\n", status);
717 int status = Nativefft::ftp_3rf(real_data, complex_data, complex_nx, nx, ny, nz);
719 fprintf(stderr,
"real_to_complex_nd(3df): status = %d\n", status);
720 LOGWARN(
"real_to_complex_nd(3df): status = %d\n", status);
727 LOGERR(
"Never should be here...\n");
732int EMfft::complex_to_real_nd(
float *complex_data,
float *real_data,
int nx,
int ny,
int nz)
734 const int rank = get_rank(ny, nz);
735 const int complex_nx = nx + 2 - nx%2;
739 complex_to_real_1d(complex_data, real_data, nx);
764 memcpy(real_data, complex_data, complex_nx*ny*
sizeof(
float));
767 int status = Nativefft::ftp_2rb(real_data, complex_nx, nx, ny);
770 fprintf(stderr,
"complex_to_real_nd(2db): status = %d\n", status);
771 LOGWARN(
"complex_to_real_nd(2db): status = %d\n", status);
777 memcpy(real_data, complex_data, complex_nx*ny*nz*
sizeof(
float));
780 int status = Nativefft::ftp_3rb(real_data, complex_nx, nx, ny, nz);
782 fprintf(stderr,
"complex_to_real_nd(3db): status = %d\n", status);
783 LOGWARN(
"complex_to_real_nd(3db): status = %d\n", status);
788 LOGERR(
"Never should be here...\n");
799int EMfft::real_to_complex_1d(
float *real_data,
float *complex_data,
int n)
801 const int complex_n = n + 2 - n%2;
805 float * comm = (
float *)malloc((3*n+100)*
sizeof(float));
807 float * fft_data = (
float *)malloc(n *
sizeof(
float));
810 memcpy(fft_data, real_data, n *
sizeof(
float));
813 scfft(0, n, fft_data, comm, &info);
815 LOGERR(
"Error happening in Initialize communication work array: %d", info);
819 scfft(1, n, fft_data, comm, &info);
821 LOGERR(
"Error happening in Initialize communication work array: %d", info);
830 transform(fft_data, fft_data+n, fft_data, time_sqrt_n(n));
832 for(
int i=0; i<complex_n; ++i) {
834 complex_data[i] = fft_data[i/2];
838 complex_data[i] = 0.0f;
841 if(n%2 == 0 && i == complex_n-1 ) {
842 complex_data[i] = 0.0f;
845 complex_data[i] = fft_data[n-i/2];
856int EMfft::complex_to_real_1d(
float *complex_data,
float *real_data,
int n)
858 int complex_n = n + 2 - n%2;
862 float * comm = (
float *)malloc((3*n+100)*
sizeof(float));
864 for(
int i=0; i<complex_n; ++i) {
866 real_data[i/2] = complex_data[i];
870 if(!(n%2 == 0 && i == complex_n-1)) {
871 real_data[n-i/2] = complex_data[i];
875 transform(real_data, real_data+n, real_data, divide_sqrt_n(n));
878 csfft(0, n, real_data, comm, &info);
880 LOGERR(
"Error happening in Initialize communication work array: %d", info);
884 for (
int j = n/2+1; j < n; j++) {
885 real_data[j] = -real_data[j];
889 csfft(1, n, real_data, comm, &info);
891 LOGERR(
"Error happening in Initialize communication work array: %d", info);
898int EMfft::real_to_complex_2d(
float *real_data,
float *complex_data,
int nx,
int ny)
900 const int complex_nx = nx + 2 - nx%2;
903 float * comm = (
float *)malloc((3*nx+100)*
sizeof(float));
906 float * fft_data = (
float *)malloc(complex_nx * ny *
sizeof(
float));
907 cout <<
"fft_data after allocation:" << endl;
908 for(
int j=0; j<ny; ++j) {
909 for(
int i=0; i<complex_nx; ++i) {
910 cout <<
"data[" << i <<
"][" << j <<
"] = " << fft_data[i+j*complex_nx] <<
"\t";
916 for(
int i=0; i<ny; ++i) {
917 memcpy(fft_data+i*complex_nx, real_data+i*nx, nx*
sizeof(
float));
920 cout << endl <<
"real_data array: " << endl;
921 for(
int j=0; j<ny; ++j) {
922 for(
int i=0; i<nx; ++i) {
923 cout << real_data[i+j*nx] <<
"\t";
927 cout << endl <<
"the fft_data array: " << endl;
928 for(
int j=0; j<ny; ++j) {
929 for(
int i=0; i<complex_nx; ++i) {
930 cout << fft_data[i+j*complex_nx] <<
"\t";
936 scfftm(ny, nx, fft_data, comm, &info);
938 cout << endl <<
"the fft_data array after x dim transform: " << endl;
939 for(
int j=0; j<ny; ++j) {
940 for(
int i=0; i<complex_nx; ++i) {
941 cout << fft_data[i+j*complex_nx] <<
"\t";
956 float * fft_data2 = (
float *)malloc(complex_nx * ny *
sizeof(
float));
968 LOGERR(
"Error happening in scfftm: %d", info);
974int EMfft::complex_to_real_2d(
float *complex_data,
float *real_data,
int nx,
int ny)
979int EMfft::real_to_complex_3d(
float *real_data,
float *complex_data,
int nx,
int ny,
int nz)
984int EMfft::complex_to_real_3d(
float *complex_data,
float *real_data,
int nx,
int ny,
int nz)
989int EMfft::real_to_complex_nd(
float *real_data,
float *complex_data,
int nx,
int ny,
int nz)
991 const int rank = get_rank(ny, nz);
995 return real_to_complex_1d(real_data, complex_data, nx);
997 return real_to_complex_2d(real_data, complex_data, nx, ny);
999 return real_to_complex_3d(real_data, complex_data, nx, ny, nz);
1001 LOGERR(
"Never should be here...\n");
1006int EMfft::complex_to_real_nd(
float *complex_data,
float *real_data,
int nx,
int ny,
int nz)
1008 const int rank = get_rank(ny, nz);
1012 return complex_to_real_1d(complex_data, real_data, nx);
1014 return complex_to_real_2d(complex_data, real_data, nx, ny);
1016 return complex_to_real_3d(complex_data, real_data, nx, ny, nz);
1018 LOGERR(
"Never should be here...\n");
pthread_mutex_t fft_mutex
#define InvalidValueException(val, desc)