doxygen/analyzer_8cpp_source.html

/*

 * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)

 * Copyright (c) 2000-2006 Baylor College of Medicine

 *

 * This software is issued under a joint BSD/GNU license. You may use the

 * source code in this file under either license. However, note that the

 * complete EMAN2 and SPARX software packages have some GPL dependencies,

 * so you are responsible for compliance with the licenses of these packages

 * if you opt to use BSD licensing. The warranty disclaimer below holds

 * in either instance.

 *

 * This complete copyright notice must be included in any revised version of the

 * source code. Additional authorship citations may be added, but existing

 * author citations must be preserved.

 *

 * This program is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation; either version 2 of the License, or

 * (at your option) any later version.

 *

 * This program is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program; if not, write to the Free Software

 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

 *

 * */


#include <ctime>

#include <memory>

#include "emdata.h"

#include "analyzer.h"

#include "sparx/analyzer_sparx.h"

#include "util.h"

#include "cmp.h"

#include "sparx/lapackblas.h"

#include "sparx/varimax.h"


using namespace EMAN;


namespace EMAN {


        const string PCAsmall::NAME = "pca";

        const string PCAlarge::NAME = "pca_large";

        const string varimax::NAME = "varimax";

        const string InertiaMatrixAnalyzer::NAME = "inertiamatrix";

        const string ShapeAnalyzer::NAME = "shape";

        const string KMeansAnalyzer::NAME = "kmeans";

        const string SVDAnalyzer::NAME = "svd_gsl";

        const string CircularAverageAnalyzer::NAME = "cir_avg";


        template <> Factory < Analyzer >::Factory()

        {

                force_add<PCAsmall>();

                force_add<PCAlarge>();

                force_add<varimax>();

                force_add<InertiaMatrixAnalyzer>();

                force_add<ShapeAnalyzer>();

                force_add<KMeansAnalyzer>();

                force_add<SVDAnalyzer>();

                force_add<CircularAverageAnalyzer>();

        }


}


int Analyzer::insert_images_list(vector<EMData *> image_list)

{

        vector<EMData *>::const_iterator iter;

                for(iter=image_list.begin(); iter!=image_list.end(); ++iter) {

                        insert_image(*iter);

                }

        return 0;

}


vector<EMData *> InertiaMatrixAnalyzer::analyze() {

        int verbose = params.set_default("verbose",0);

        EMData *mx = new EMData(3,3);   // result is a 3x3 matrix

        mx->to_zero();

        ret.push_back(mx);


        if (images.size()!=1) throw ImageDimensionException("Inertia matrix computation accepts only a single volume as input");

        int nx=images[0]->get_xsize();

        int ny=images[0]->get_ysize();

        int nz=images[0]->get_zsize();

        if (nz==1 || ny==1 || nz==1) throw ImageDimensionException("Map must be 3-D");


        if (verbose>0) printf("Inertia volume size: %d %d %d\n",nx,ny,nz);


        for (int z=0; z<nz; z++) {

                for (int y=0; y<ny; y++) {

                        for (int x=0; x<nx; x++) {

                                int xx=x-nx/2;

                                int yy=y-ny/2;

                                int zz=z-nz/2;

                                float v=images[0]->get_value_at(x,y,z);

                                mx->set_value_at(0,0,mx->get_value_at(0,0)+v*(yy*yy+zz*zz));

                                mx->set_value_at(0,1,mx->get_value_at(0,1)+v*(-xx*yy));

                                mx->set_value_at(0,2,mx->get_value_at(0,2)+v*(-xx*zz));

                                mx->set_value_at(1,0,mx->get_value_at(1,0)+v*(-xx*yy));

                                mx->set_value_at(1,1,mx->get_value_at(1,1)+v*(zz*zz+xx*xx));

                                mx->set_value_at(1,2,mx->get_value_at(1,2)+v*(-yy*zz));

                                mx->set_value_at(2,0,mx->get_value_at(2,0)+v*(-xx*zz));

                                mx->set_value_at(2,1,mx->get_value_at(2,1)+v*(-yy*zz));

                                mx->set_value_at(2,2,mx->get_value_at(2,2)+v*(xx*xx+yy*yy));

                        }

                }

        }

        mx->mult(1.0f/(nx*ny*nz));


        if (verbose>0) {

                printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,0),mx->get_value_at(1,0),mx->get_value_at(2,0));

                printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,1),mx->get_value_at(1,1),mx->get_value_at(2,1));

                printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,2),mx->get_value_at(1,2),mx->get_value_at(2,2));

        }


        return ret;

}


vector<EMData *> ShapeAnalyzer::analyze() {

        int verbose = params.set_default("verbose",0);

        EMData *mx = new EMData(4,2,1); // result is 4 values

        mx->to_zero();

        ret.push_back(mx);


        if (images.size()!=1) throw ImageDimensionException("Shape computation accepts only a single volume as input");

        int nx=images[0]->get_xsize();

        int ny=images[0]->get_ysize();

        int nz=images[0]->get_zsize();

        if (nz==1 || ny==1 || nz==1) throw ImageDimensionException("Map must be 3-D");


        if (verbose>0) printf("Shape size: %d %d %d\n",nx,ny,nz);


        for (int z=0; z<nz; z++) {

                for (int y=0; y<ny; y++) {

                        for (int x=0; x<nx; x++) {

                                int xx=x-nx/2;

                                int yy=y-ny/2;

                                int zz=z-nz/2;

                                float v=images[0]->get_value_at(x,y,z);

                                mx->set_value_at(0,0,mx->get_value_at(0,0)+v*(xx*xx));

                                mx->set_value_at(1,0,mx->get_value_at(1,0)+v*(yy*yy));

                                mx->set_value_at(2,0,mx->get_value_at(2,0)+v*(zz*zz));

                                mx->set_value_at(3,0,mx->get_value_at(3,0)+v*(xx*xx+yy*yy+zz*zz));

                                // sum(m*r^2), in which r is the distance to the center. Used for minicircle classification

                                mx->set_value_at(0,1,mx->get_value_at(0,0)+v*abs(xx));

                                mx->set_value_at(1,1,mx->get_value_at(1,0)+v*abs(yy));

                                mx->set_value_at(2,1,mx->get_value_at(2,0)+v*abs(zz));

                                mx->set_value_at(3,1,mx->get_value_at(3,0)+v*(float)sqrt((float)(xx*xx+yy*yy+zz*zz)));

                        }

                }

        }

        mx->mult(1.0f/(nx*ny*nz));


        return ret;

}


void KMeansAnalyzer::set_params(const Dict & new_params)

{

        params = new_params;

        if (params.has_key("ncls")) ncls = nclstot = params["ncls"];

        if (params.has_key("maxiter"))maxiter = params["maxiter"];

        if (params.has_key("minchange"))minchange = params["minchange"];

        if (params.has_key("mininclass"))mininclass = params["mininclass"];

        if (params.has_key("slowseed"))slowseed = params["slowseed"];

        if (params.has_key("verbose"))verbose = params["verbose"];

        if (params.has_key("calcsigmamean")) calcsigmamean=params["calcsigmamean"];

        if (params.has_key("outlierclass")) outlierclass=params["outlierclass"];

}


vector<EMData *> KMeansAnalyzer::analyze()

{

if (ncls<=1) return vector<EMData *>();

//srandom(time(0));


// These are the class centers, start each with a random image

int nptcl=images.size();

if (calcsigmamean) centers.resize(nclstot*2);

else centers.resize(nclstot);

if (mininclass<1) mininclass=1;


int seedmode=params.set_default("seedmode",(int)0);


// in outlier mode we don't use the bad center concept

if (outlierclass==0) {

        for (int i=0; i<nptcl; i++) images[i]->set_attr("is_ok_center",(int)5);  // if an image becomes part of too small a set, it will (eventually) be marked as a bad center

}


if (slowseed) {

        if (ncls>25) ncls=slowseed=ncls/25+1;   // this becomes the number to seed in each step

//      if (maxiter<ncls*3+20) maxiter=ncls*3+20;       // We need to make sure we have enough iterations to seed all of the classes

//      ncls=2;

}


if (seedmode==0) {

        for (int i=0; i<ncls; i++) {

                // Fixed by d.woolford, Util.get_irand is inclusive (added a -1)

                centers[i]=images[Util::get_irand(0,nptcl-1)]->copy();

        }

}

else if (seedmode==1) {

        // find the images with the largest and smallest sum

        EMData *max;

        float maxv=-1.0e27;

        EMData *min;

        float minv=1.0e27;

        for (int i=0; i<nptcl; i++) {

                float m = images[i]->get_attr("mean");

                if (m<minv) { minv=m; min=images[i]; }

                if (m>maxv) { maxv=m; max=images[i]; }

        }

        centers[0]=min->copy();

        centers[ncls-1]=max->copy();


        // now fill in linear interpolates in between

        for (int i=1; i<ncls-1; i++) {

                centers[i]=centers[0]->copy();

                centers[i]->mult((ncls-i-1.0f)/(ncls-1.0f));

                EMData *tmp=max->copy();

                tmp->mult(i/(ncls-1.0f));

                centers[i]->add(*tmp);

                delete tmp;

        }

}


if (calcsigmamean) {

        for (int i=nclstot; i<nclstot*2; i++) centers[i]=new EMData(images[0]->get_xsize(),images[0]->get_ysize(),images[0]->get_zsize());

}


for (int i=0; i<maxiter; i++) {

        nchanged=0;

        resort();

        reclassify();

        update_centers();

        if (verbose) printf("iter %d>  %d (%d)\n",i,nchanged,ncls);

        if (nchanged<minchange && ncls==nclstot) break;


        if (slowseed && i%3==2 && ncls<nclstot) {

                for (int j=0; j<slowseed && ncls<nclstot; j++) {

                        centers[ncls]=0;

                        ncls++;

                }

                reseed();

        }

}

update_centers(calcsigmamean);


return centers;

}


void KMeansAnalyzer::update_centers(int sigmas) {

int nptcl=images.size();

//int repr[ncls];

vector<int> repr(ncls);


for (int i=0; i<ncls; i++) {

        centers[i]->to_zero();

        if (sigmas) centers[i+ncls]->to_zero();

        repr[i]=0;

        centers[i]->set_attr("worst_ptcldist",0.0f);

}


// compute new position for each center

for (int i=0; i<nptcl; i++) {

        int cid=images[i]->get_attr("class_id");

        // outlier mode disables is_ok_center functionality

        if (outlierclass || (int)images[i]->get_attr("is_ok_center")>0) {

                centers[cid]->add(*images[i]);

                if (sigmas) centers[cid+ncls]->addsquare(*images[i]);

                repr[cid]++;

                float imdist=images[i]->get_attr("class_cendist");

                if (imdist>(float)centers[cid]->get_attr("worst_ptcldist")) {

                        centers[cid]->set_attr("worst_ptcldist",imdist);

                        centers[cid]->set_attr("worst_ptcl",i);

                }

        }

}


for (int i=0; i<ncls; i++) {

        // If this class is too small, outlier class is never reseeded

        if (repr[i]<mininclass && (outlierclass==0||i<nclstot-1)) {

                // find all of the particles in the class, and decrement their "is_ok_center" counter.

                // when it reaches zero the particle will no longer participate in determining the location of a center

                if (outlierclass) {     // outliers are relegated to the outlier class permanently

                        for (int j=0; j<nptcl; j++) {

                                if ((int)images[j]->get_attr("class_id")==i) {

                                        if (verbose) printf("outlier: %d\n",j);

                                        images[j]->set_attr("class_id",nclstot-1);

                                        //nchanged++;   // should happen automatically below

                                }

                        }

                }

                // if not using outlier class, we use "is_ok_center" concept to reduce influence of outliers

                else {

                        for (int j=0; j<nptcl; j++) {

                                if ((int)images[j]->get_attr("class_id")==i) images[i]->set_attr("is_ok_center",(int)images[i]->get_attr("is_ok_center")-1);

                        }

                }

                // Mark the center for reseeding

                delete centers[i];

                centers[i]=0;

                repr[i]=0;

        }

        // finishes off the statistics we started computing above

        else {

                centers[i]->mult((float)1.0/(float)(repr[i]));

                centers[i]->set_attr("ptcl_repr",repr[i]);

                if (sigmas) {

                        centers[i+ncls]->mult((float)1.0/(float)(repr[i]));             // sum of squares over n

                        centers[i+ncls]->subsquare(*centers[i]);                                        // subtract the mean value squared

                        centers[i+ncls]->process("math.sqrt");                                  // square root

                        centers[i+ncls]->mult((float)1.0/(float)sqrt((float)repr[i]));          // divide by sqrt(N) to get std. dev. of mean

                }


        }

        if (verbose>1) printf("%d(%d)\t",i,(int)repr[i]);

}


if (verbose>1) printf("\n");


reseed();

}


// This will look for any unassigned points and reseed each inside the class with the broadest distribution widely distributed

void KMeansAnalyzer::reseed() {

int nptcl=images.size();

int i,j;


// if no classes need reseeding just return

for (i=0; i<ncls; i++) {

        if (!centers[i]) break;

}

if (i==ncls) return;


// make a list of all particles which could be centers

vector<int> goodcen;

if (outlierclass) {

        for (int i=0; i<nptcl; i++) { if ((int)images[i]->get_attr("class_id")!=nclstot-1) goodcen.push_back(i); }

}

else {

//      printf("c%d\n",outlierclass);

        for (int i=0; i<nptcl; i++) { if ((int)images[i]->get_attr("is_ok_center")>0) goodcen.push_back(i); }

}


if (goodcen.size()==0) {

        printf("Kmeans ran out of valid center particles, disabling outlier mode and finishing. Results not valid.\n");

        for (int i=0; i<nptcl; i++) goodcen.push_back(i);

        outlierclass=0;

        return;

}

//      throw UnexpectedBehaviorException("Kmeans ran out of valid center particles with the provided parameters");


// pick a random particle for the new seed

// for (i=0; i<ncls; i++) {

//      if (centers[i]) continue;               // center doesn't need reseeding

//      j=Util::get_irand(0,goodcen.size()-1);

//      centers[i]=images[j]->copy();           // Isn't this wrong? Should it be looking in goodcen?

//      centers[i]->set_attr("ptcl_repr",1);

//      printf("reseed %d -> %d\n",i,j);

// }


// use a valid center with a large distance for the new seed

for (i=0; i<ncls; i++) {

        if (centers[i]) continue;               // center doesn't need reseeding

        if (outlierclass) j=Util::get_irand(0,ncls-2);  // don't reuse particles identified as outliers

        else j=Util::get_irand(0,ncls-1);       // pick a random class

        // The worst particle method with outliers often 'eats' all of the particles

        if (!outlierclass && centers[j] && centers[j]->has_attr("worst_ptcl")) {                // try to use the worst particle from that class

                centers[i]=images[(int)centers[j]->get_attr("worst_ptcl")]->copy();

                printf("reseed %d -> worst (cls %d)\n",i,j);

        }

        else {

                j=Util::get_irand(0,goodcen.size()-1);

                centers[i]=images[goodcen[j]]->copy();

                printf("reseed %d -> %d\n",i,j);

        }

        centers[i]->set_attr("ptcl_repr",1);

}


}


// a quick MSD between two images, MUCH faster than using sqeuclidean for small images

float qsqcmp(EMData *a,EMData *b) {

        size_t n = a->get_size();

        float *d1=a->get_data();

        float *d2=b->get_data();


        double ret=0.0;

        for (size_t i=0; i<n; i++) ret+=pow(d1[i]-d2[i],2);


        return (float)ret;

}


// Tries to generate a reasonable similarity path through the centers to put more similar centers closer to each other

void KMeansAnalyzer::resort() {


//      Cmp *c = Factory < Cmp >::get("sqeuclidean");


        // The first center remains first, we proceed from that starting point

        // simple shells sort to an out-of-place reference

        int sortmax=ncls;

        if (outlierclass && ncls==nclstot) sortmax--;   // outlier class must not get resorted!


        for (int i=1; i<sortmax; i++) {

                float bst=1.0e22;

                for (int j=i; j<sortmax; j++) {

//                      float d=c->cmp(centers[i-1],centers[j]);

                        float d=qsqcmp(centers[i-1],centers[j]);

                        if (d<bst) {

                                bst=d;

                                if (j!=i) {

                                        EMData *tmp=centers[j];

                                        centers[j]=centers[i];

                                        centers[i]=tmp;

                                }

                        }

                }

        }


//      delete c;

}


// Redetermine which class each particle belongs in

void KMeansAnalyzer::reclassify() {

int nptcl=images.size();


//Cmp *c = Factory < Cmp >::get("sqeuclidean");

for (int i=0; i<nptcl; i++) {

        if (outlierclass && (int)images[i]->get_attr_default("class_id",0)==nclstot-1) continue;        // outliers are forever

        float best=1.0e38f;

        int bestn=0;

        int lim=ncls;

        if (outlierclass) lim=ncls-1;   // particles don't join the outliers based on distance

        for (int j=0; j<lim; j++) {

//              float d=c->cmp(images[i],centers[j]);

                float d=qsqcmp(images[i],centers[j]);

                if (d<best) { best=d; bestn=j; }

        }

        int oldn=images[i]->get_attr_default("class_id",0);

        if (oldn!=bestn) nchanged++;

        images[i]->set_attr("class_id",bestn);

        images[i]->set_attr("class_cendist",best);              // store this for reseeding

}

//delete c;

}


#define covmat(i,j) covmat[ ((j)-1)*nx + (i)-1 ]

#define imgdata(i)  imgdata[ (i)-1 ]

int PCAsmall::insert_image(EMData * image)

{

        if(mask==0)

                throw NullPointerException("Null mask image pointer, set_params() first");


   EMData *maskedimage = Util::compress_image_mask(image,mask);


   int nx = maskedimage->get_xsize();

   float *imgdata = maskedimage->get_data();

   if (nx != ncov) {

      fprintf(stderr,"insert_image: something is wrong...\n");

      exit(1);

   }


   // there is a faster version of the following rank-1 update

   nimages++;

   for (int j = 1; j <= nx; j++)

       for (int i = 1; i<=nx; i++) {

           covmat(i,j) += imgdata(i)*imgdata(j);

   }


   EMDeletePtr(maskedimage);

   return 0;

}

#undef covmat


#define eigvec(i,j) eigvec[(j)*ncov + (i)]

vector<EMData*> PCAsmall::analyze()

{

        float *eigvec;

        int status = 0;

//              printf("start analyzing..., ncov = %d\n", ncov);

        eigval = (float*)calloc(ncov,sizeof(float));

        eigvec = (float*)calloc(ncov*ncov,sizeof(float));

        status = Util::coveig(ncov, covmat, eigval, eigvec);

//       for (int i=1; i<=nvec; i++) printf("eigval = %11.4e\n",

//            eigval[ncov-i]);


        // pack eigenvectors into the return imagelist

        EMData *eigenimage = new EMData();

        eigenimage->set_size(ncov,1,1);

        float *rdata = eigenimage->get_data();

        for (int j = 1; j<= nvec; j++) {

            for (int i = 0; i < ncov; i++) rdata[i] = eigvec(i,ncov-j);


                EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);

                recons_eigvec->set_attr( "eigval", eigval[j-1] );

            images.push_back(recons_eigvec);

        }


        free(eigvec);

        EMDeletePtr(eigenimage);


        return images;

}

#undef eigvec


void PCAsmall::set_params(const Dict & new_params)

{

        params = new_params;

        mask = params["mask"];

        nvec = params["nvec"];


        // count the number of pixels under the mask

        // (this is really ugly!!!)

        EMData *dummy = new EMData();


        int nx = mask->get_xsize();

        int ny = mask->get_ysize();

        int nz = mask->get_zsize();


        dummy->set_size(nx,ny,nz);


        EMData *dummy1d = Util::compress_image_mask(dummy,mask);

        ncov = dummy1d->get_xsize();

        EMDeletePtr(dummy);

        EMDeletePtr(dummy1d);


        // allocate and set up the covriance matrix

        nimages = 0;

        covmat = (float*)calloc(ncov*ncov,sizeof(float));

}


//------------------------------------------------------------------

// for large-scale PCA incore


int PCAlarge::insert_image(EMData * image)

{

        if(mask==0)

                throw NullPointerException("Null mask image pointer, set_params() first");


   EMData *maskedimage = Util::compress_image_mask(image,mask);


   FILE *fp;

   string scratchfile = params.set_default("tmpfile","maskedimages.scratch");


   fp = fopen(scratchfile.c_str(),"ab");


   int nx = maskedimage->get_xsize();

   float *imgdata = maskedimage->get_data();

   fwrite(imgdata, sizeof(float), nx, fp);

   nimages++;


   fclose(fp);


   EMDeletePtr(maskedimage);


   return 0;

}


void PCAlarge::set_params(const Dict & new_params)

{

        params = new_params;

        mask = params["mask"];

        nvec = params["nvec"];


        // count the number of pixels under the mask

        // (this is really ugly!!!)

        EMData *dummy = new EMData();


        int nx = mask->get_xsize();

        int ny = mask->get_ysize();

        int nz = mask->get_zsize();


        dummy->set_size(nx,ny,nz);


        EMData *dummy1d = Util::compress_image_mask(dummy,mask);


        ncov = dummy1d->get_xsize();


        EMDeletePtr(dummy);

        EMDeletePtr(dummy1d);

        // no need to allocate the covariance matrix

        nimages = 0;

}


#define qmat(i,j)   qmat[((j)-1)*kstep + (i) -1]

#define diag(i)     diag[(i)-1]

#define rdata(i)    rdata[(i)-1]

#define eigvec(i,j) eigvec[((j)-1)*ncov + (i)-1]

#define eigval(i)   eigval[(i)-1]


vector<EMData*> PCAlarge::analyze()

{

        int status = 0;

        int ione = 1;

        float one = 1.0, zero = 0.0;

        char trans;

        float *eigvec;

        string scratchfile = (string) params["tmpfile"];

        char command[100];


//      printf("start analyzing..., ncov = %d\n", ncov);


        float resnrm = 0.0;


        if ( nvec > nimages || nvec ==0 ) nvec = nimages;

        int nx = ncov;


        // the definition of kstep is purely a heuristic for right now

        int kstep = nvec*2 + 20;

        if (kstep > nimages) kstep = nimages;


        float *diag    = new float[kstep];

        float *subdiag = new float[kstep-1];

        float *vmat    = new float[nx*kstep];


        // run kstep-step Lanczos factorization

        status = Lanczos(scratchfile, &kstep, diag, subdiag,

                         vmat, &resnrm);


        // remove scratch file

#ifdef _WIN32

        if (_unlink(scratchfile.c_str()) == -1) {

                fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");

        }

#else

        sprintf(command,"rm -f %s\n", scratchfile.c_str());

        status = system(command);

        if (status != 0) {

                fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");

        }

#endif  //_WIN32


        char jobz[2] = "V";

        float *qmat  = new float[kstep*kstep];

        // workspace size will be optimized later

        int   lwork  = 100 + 4*kstep + kstep*kstep;

        int   liwork = 3+5*kstep;


        float *work  = new float[lwork];

        int   *iwork = new int[liwork];

        int   info = 0;


        // call LAPACK tridiagonal eigensolver

        sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,

                iwork, &liwork, &info);


        // store eigenvalues

        eigval = (float*)calloc(ncov,sizeof(float));

        eigvec = (float*)calloc(ncov*nvec,sizeof(float));


        for (int j = 0; j < nvec; j++) {

            eigval[j] = diag(kstep-j);

        }


//         for (int i=0; i<nvec; i++) printf("eigval = %11.4e\n",

//             eigval[i]);


        // compute eigenvectors

        for (int j=1; j<=nvec; j++) {

            trans = 'N';

            sgemv_(&trans, &nx,  &kstep, &one, vmat, &nx, &qmat(1,kstep-j+1),

                   &ione, &zero, &eigvec(1,j), &ione);

        }


        // pack eigenvectors into the return imagelist

        EMData *eigenimage = new EMData();

        eigenimage->set_size(ncov,1,1);

        float *rdata = eigenimage->get_data();

        for (int j = 1; j<= nvec; j++) {

            for (int i = 1; i <= ncov; i++)

                rdata(i) = eigvec(i,j);


            EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);


            recons_eigvec->set_attr( "eigval", eigval[j-1] );


            images.push_back( recons_eigvec );

        }


        free(eigvec);

        EMDeletePtr(eigenimage);


        return images;

}

#undef qmat

#undef diag

#undef rdata

#undef eigvec

#undef eigval


#define TOL 1e-7

#define V(i,j)      V[((j)-1)*imgsize + (i) - 1]

#define v0(i)       v0[(i)-1]

#define Av(i)       Av[(i)-1]

#define subdiag(i)  subdiag[(i)-1]

#define diag(i)     diag[(i)-1]

#define hvec(i)     hvec[(i)-1]


int PCAlarge::Lanczos(const string &maskedimages, int *kstep,

                      float  *diag, float *subdiag, float *V,

                      float  *beta)

{

    /*

        Purpose: Compute a kstep-step Lanczos factorization

                 on the covariant matrix X*trans(X), where

                 X (imgstack) contains a set of images;


        Input:

           imgstack (vector <EMData*>) a set of images on which PCA is

                                       to be performed;


           kstep (int*) The maximum number of Lanczos iterations allowed.

                          If Lanczos terminates before kstep steps

                          is reached (an invariant subspace is found),

                          kstep returns the number of steps taken;


        Output:

           diag (float *) The projection of the covariant matrix into a

                          Krylov subspace of dimension at most kstep.

                          The projection is a tridiagonal matrix. The

                          diagonal elements of this matrix is stored in

                          the diag array.


           subdiag (float*) The subdiagonal elements of the projection

                            is stored here.


           V (float *)    an imgsize by kstep array that contains a

                          set of orthonormal Lanczos basis vectors;


           beta (float *) the residual norm of the factorization;

    */

    int i, iter;


    float alpha;

    int   ione = 1;

    float zero = 0.0, one = 1.0, mone = -1.0;

    int   status = 0;


    char trans;

    int  imgsize = 0;

    float *v0, *Av, *hvec, *htmp, *imgdata;

    FILE  *fp=NULL;


    if (nimages <= 0) {

        status = 2; // no image in the stack

        goto EXIT;

    }


    imgsize = ncov;

    if (nimages <= 0) {

        status = 3; // no image in the stack

        goto EXIT;

    }


    v0   = new float[imgsize];

    Av   = new float[imgsize];

    hvec = new float[*kstep];

    htmp = new float[*kstep];

    imgdata = new float[imgsize];


    if (v0 == NULL || Av == NULL || hvec == NULL ||

        htmp == NULL || imgdata == NULL) {

        fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");

        status = -1;

        goto EXIT;

    }


    // may choose a random starting guess here

    for ( i = 1; i <= imgsize; i++)

    {

        v0(i) = 1.0;

        Av(i) = 0.0;

    }


    // normalize the starting vector

    *beta  = snrm2_(&imgsize, v0, &ione);

    for (i = 1; i<=imgsize; i++)

        V(i,1) = v0(i) / (*beta);


    // do Av <-- A*v0, where A is a cov matrix

    fp = fopen(maskedimages.c_str(),"rb");

    if (fp==NULL) {

        fprintf(stderr,"Lanczos: cannot open %s\n", maskedimages.c_str());

    }

    for (i = 0; i < nimages; i++) {

       fread(imgdata, sizeof(float), imgsize, fp);

       alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);

       saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);

    }

    fclose(fp);


    // Av <--- Av - V(:,1)*V(:,1)'*Av

    diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);

    alpha   = -diag(1);

    saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);


    // main loop

    for ( iter = 2 ; iter <= *kstep ; iter++ ) {

        *beta = snrm2_(&imgsize, Av, &ione);


        if (*beta < TOL) {

            // found an invariant subspace, exit

            *kstep = iter;

            break;

        }


        subdiag(iter-1) = *beta;

        for ( i = 1 ; i <= imgsize ; i++ ) {

            V(i,iter) = Av(i) / (*beta);

        }


        // do Av <-- A*V(:,iter), where A is a cov matrix

        for (i = 0; i < imgsize; i++) Av[i] = 0;

        fp = fopen(maskedimages.c_str(),"rb");

        for (i = 0; i < nimages; i++) {

           fread(imgdata, sizeof(float), imgsize, fp);

           alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);

           saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);

        }

        fclose(fp);


        // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av

        trans = 'T';

        status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,

                        &zero , hvec    , &ione);

        trans = 'N';

        status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,

                        &ione , &one    , Av, &ione);


        // one step of reorthogonalization

        trans = 'T';

        status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,

                        &zero , htmp    , &ione);

        saxpy_(&iter, &one, htmp, &ione, hvec, &ione);

        trans = 'N';

        status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,

                        &ione , &one    , Av, &ione);

        diag(iter) = hvec(iter);

    }


    EMDeleteArray(v0);

    EMDeleteArray(Av);

    EMDeleteArray(hvec);

    EMDeleteArray(htmp);

    EMDeleteArray(imgdata);


EXIT:

    return status;


}

#undef v0

#undef Av

#undef V

#undef hvec

#undef diag

#undef subdiag

#undef TOL


void varimax::set_params(const Dict & new_params)

{

        params = new_params;

        m_mask = params["mask"];


        // count the number of pixels under the mask

        // (this is really ugly!!!)

        EMData *dummy = new EMData();


        int nx = m_mask->get_xsize();

        int ny = m_mask->get_ysize();

        int nz = m_mask->get_zsize();


        dummy->set_size(nx,ny,nz);


        EMData *dummy1d = Util::compress_image_mask(dummy,m_mask);


        m_nlen = dummy1d->get_xsize();

        m_nfac = 0;


        EMDeletePtr(dummy);

        EMDeletePtr(dummy1d);

}


int varimax::insert_image(EMData* image)

{

        if(m_mask==0)

                throw NullPointerException("Null mask image pointer, set_params() first");


    EMData* img1d = Util::compress_image_mask(image,m_mask);


    m_data.insert( m_data.end(), img1d->get_data(), img1d->get_data() + m_nlen );


    m_nfac++;


    Assert( (int)m_data.size() == m_nfac*m_nlen);


    return 0;

}


vector<EMData*> varimax::analyze()

{

    int itmax = 10000;

    float eps = 1e-4f;

    int verbose = 1;

    float params[4];

    params[0] = 1.0;

    varmx( &m_data[0], m_nlen, m_nfac, IVARIMAX, params, NULL, itmax, eps, verbose);


    vector<EMData*> images;


    EMData* img1d = new EMData();

    img1d->set_size(m_nlen, 1, 1);

    for( int i=0; i < m_nfac; ++i )

    {

        float* imgdata = img1d->get_data();


        int offset = i * m_nlen;

        for( int i=0; i < m_nlen; ++i )

        {

            imgdata[i] = m_data[offset+i];

        }


        EMData* img = Util::reconstitute_image_mask(img1d,m_mask);

        images.push_back(img);

    }


    EMDeletePtr(img1d);


    return images;

}


int SVDAnalyzer::insert_image(EMData * image)

{

        if (mask==0)

                throw NullPointerException("Null mask image pointer, set_params() first");


        // count pixels under mask

        size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();

        float  *d=image->get_data();

        float *md=mask ->get_data();

        for (size_t i=0,j=0; i<totpix; ++i) {

                if (md[i]) {

                        gsl_matrix_set(A,j,nsofar,d[i]);

                        j++;

                }

        }

        nsofar++;


   return 0;

}

#undef covmat


#define eigvec(i,j) eigvec[(j)*ncov + (i)]

vector<EMData*> SVDAnalyzer::analyze()

{

// Allocate the working space

gsl_vector *work=gsl_vector_alloc(nimg);

gsl_vector *S=gsl_vector_alloc(nimg);

gsl_matrix *V=gsl_matrix_alloc(nimg,nimg);

gsl_matrix *X=gsl_matrix_alloc(nimg,nimg);


// Do the decomposition. All the real work is here

gsl_linalg_SV_decomp_mod (A,X, V, S, work);

//else gsl_linalg_SV_decomp_jacobi(A,V,S);


vector<EMData*> ret;

//unpack the results and write the output file

float *md=mask->get_data();

size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();

for (int k=0; k<nvec; k++) {

        EMData *img = new EMData;

        img->set_size(mask->get_xsize(),mask->get_ysize(),mask->get_zsize());


        float  *d=img->get_data();

        for (size_t i=0,j=0; i<totpix; ++i) {

                if (md[i]) {

                        d[i]=(float)gsl_matrix_get(A,j,k);

                        j++;

                }

        }

        img->set_attr( "eigval", gsl_vector_get(S,k));

        ret.push_back(img);

}


gsl_vector_free(work);

gsl_vector_free(S);

gsl_matrix_free(V);

gsl_matrix_free(X);


gsl_matrix_free(A);

A=NULL;

mask=NULL;


return ret;

}


void SVDAnalyzer::set_params(const Dict & new_params)

{

        params = new_params;

        mask = params["mask"];

        nvec = params["nvec"];

        nimg = params["nimg"];


        // count pixels under mask

        pixels=0;

        size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();

        float *d=mask->get_data();

        for (size_t i=0; i<totpix; ++i) if (d[i]) ++pixels;


        printf("%d,%d\n",pixels,nimg);

        A=gsl_matrix_alloc(pixels,nimg);

        nsofar=0;

}


void EMAN::dump_analyzers()

{

        dump_factory < Analyzer > ();

}


map<string, vector<string> > EMAN::dump_analyzers_list()

{

        return dump_factory_list < Analyzer > ();

}


vector<EMData *> CircularAverageAnalyzer::analyze() {

//      for (int i=0; i<10; i++)

//              avg->set_value_at(i,0,i);


        if (images.size()!=1) throw ImageDimensionException("Only takes a single image as input");

        int nx=images[0]->get_xsize();

        int ny=images[0]->get_ysize();

        int nz=images[0]->get_zsize();

        if (nz>1)

                throw ImageDimensionException("Only takes 2D images.");

        int maxr=params.set_default("maxr",nx/2-1);

        int step=params.set_default("step",2);


        EMData *avg = new EMData(maxr/step+1,1);


        int ix,iy,it,count;

        for (it=0; it<maxr; it+=step){

                float mn=0;

                count=0;

                for (ix=-maxr-1; ix<=maxr+1; ix++){

                        for (iy=-maxr-1; iy<=maxr+1; iy++){

                                int d2=ix*ix+iy*iy;

                                if (d2>=it*it && d2<(it+step)*(it+step)){

                                        count++;

                                        mn+=images[0]->sget_value_at(ix+nx/2,iy+ny/2);


                                }

                        }

                }


                mn/=count;

                if(verbose>0) printf("%d,%d,%f\n",it,count,mn);

                avg->set_value_at(it/step,0,mn);

        }


        ret.push_back(avg);

        return ret;

}


TOL
#define TOL
Definition: analyzer.cpp:696

hvec
#define hvec(i)
Definition: analyzer.cpp:702

V
#define V(i, j)
Definition: analyzer.cpp:697

qmat
#define qmat(i, j)
Definition: analyzer.cpp:590

eigval
#define eigval(i)
Definition: analyzer.cpp:594

diag
#define diag(i)
Definition: analyzer.cpp:701

Av
#define Av(i)
Definition: analyzer.cpp:699

rdata
#define rdata(i)
Definition: analyzer.cpp:592

qsqcmp
float qsqcmp(EMData *a, EMData *b)
Definition: analyzer.cpp:388

imgdata
#define imgdata(i)
Definition: analyzer.cpp:453

v0
#define v0(i)
Definition: analyzer.cpp:698

covmat
#define covmat(i, j)
Definition: analyzer.cpp:452

eigvec
#define eigvec(i, j)
Definition: analyzer.cpp:958

subdiag
#define subdiag(i)
Definition: analyzer.cpp:700

analyzer.h

EMAN::Analyzer::insert_image
virtual int insert_image(EMData *image)=0
insert a image to the list of input images

EMAN::Analyzer::params
Dict params
Definition: analyzer.h:116

EMAN::Analyzer::images
vector< EMData * > images
Definition: analyzer.h:117

EMAN::CircularAverageAnalyzer::analyze
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:1034

EMAN::CircularAverageAnalyzer::verbose
int verbose
Definition: analyzer.h:407

EMAN::CircularAverageAnalyzer::ret
vector< EMData * > ret
Definition: analyzer.h:408

EMAN::CircularAverageAnalyzer::NAME
static const string NAME
Definition: analyzer.h:404

EMAN::Dict
Dict is a dictionary to store <string, EMObject> pair.
Definition: emobject.h:385

EMAN::Dict::set_default
type set_default(const string &key, type val)
Default setting behavior This can be achieved using a template - d.woolford Jan 2008 (before there wa...
Definition: emobject.h:569

EMAN::Dict::has_key
bool has_key(const string &key) const
Ask the Dictionary if it as a particular key.
Definition: emobject.h:511

EMAN::EMData
EMData stores an image's data and defines core image processing routines.
Definition: emdata.h:82

EMAN::Factory
Factory is used to store objects to create new instances.
Definition: emobject.h:725

EMAN::InertiaMatrixAnalyzer::NAME
static const string NAME
Definition: analyzer.h:162

EMAN::InertiaMatrixAnalyzer::verbose
int verbose
Definition: analyzer.h:165

EMAN::InertiaMatrixAnalyzer::ret
vector< EMData * > ret
Definition: analyzer.h:166

EMAN::InertiaMatrixAnalyzer::analyze
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:78

EMAN::KMeansAnalyzer::resort
void resort()
Definition: analyzer.cpp:400

EMAN::KMeansAnalyzer::set_params
void set_params(const Dict &new_params)
Set the Analyzer parameters using a key/value dictionary.
Definition: analyzer.cpp:161

EMAN::KMeansAnalyzer::centers
vector< EMData * > centers
Definition: analyzer.h:288

EMAN::KMeansAnalyzer::mininclass
int mininclass
Definition: analyzer.h:294

EMAN::KMeansAnalyzer::calcsigmamean
int calcsigmamean
Definition: analyzer.h:297

EMAN::KMeansAnalyzer::nclstot
int nclstot
Definition: analyzer.h:290

EMAN::KMeansAnalyzer::nchanged
int nchanged
Definition: analyzer.h:295

EMAN::KMeansAnalyzer::update_centers
void update_centers(int sigmas=0)
Definition: analyzer.cpp:255

EMAN::KMeansAnalyzer::outlierclass
int outlierclass
Definition: analyzer.h:298

EMAN::KMeansAnalyzer::reclassify
void reclassify()
Definition: analyzer.cpp:429

EMAN::KMeansAnalyzer::maxiter
int maxiter
Definition: analyzer.h:293

EMAN::KMeansAnalyzer::verbose
int verbose
Definition: analyzer.h:291

EMAN::KMeansAnalyzer::slowseed
int slowseed
Definition: analyzer.h:296

EMAN::KMeansAnalyzer::reseed
void reseed()
Definition: analyzer.cpp:329

EMAN::KMeansAnalyzer::minchange
int minchange
Definition: analyzer.h:292

EMAN::KMeansAnalyzer::ncls
int ncls
Definition: analyzer.h:289

EMAN::KMeansAnalyzer::analyze
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:174

EMAN::KMeansAnalyzer::NAME
static const string NAME
Definition: analyzer.h:280

EMAN::SVDAnalyzer::insert_image
virtual int insert_image(EMData *image)
insert a image to the list of input images
Definition: analyzer.cpp:937

EMAN::SVDAnalyzer::analyze
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:959

EMAN::SVDAnalyzer::nvec
int nvec
Definition: analyzer.h:354

EMAN::SVDAnalyzer::set_params
void set_params(const Dict &new_params)
Set the Analyzer parameters using a key/value dictionary.
Definition: analyzer.cpp:1003

EMAN::SVDAnalyzer::A
gsl_matrix * A
Definition: analyzer.h:360

EMAN::SVDAnalyzer::nimg
int nimg
Definition: analyzer.h:356

EMAN::SVDAnalyzer::NAME
static const string NAME
Definition: analyzer.h:350

EMAN::SVDAnalyzer::nsofar
int nsofar
Definition: analyzer.h:359

EMAN::SVDAnalyzer::pixels
int pixels
Definition: analyzer.h:355

EMAN::SVDAnalyzer::mask
EMData * mask
Definition: analyzer.h:353

EMAN::ShapeAnalyzer::verbose
int verbose
Definition: analyzer.h:215

EMAN::ShapeAnalyzer::ret
vector< EMData * > ret
Definition: analyzer.h:216

EMAN::ShapeAnalyzer::NAME
static const string NAME
Definition: analyzer.h:212

EMAN::ShapeAnalyzer::analyze
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:122

EMAN::Util::get_irand
static int get_irand(int low, int high)
Get an integer random number between low and high, [low, high].
Definition: util.cpp:719

cmp.h

Assert
#define Assert(s)
Define Assert() function that is effective only when -DDEBUG is used.
Definition: emassert.h:42

emdata.h

sqrt
EMData * sqrt() const
return square root of current image
Definition: emdata_core.cpp:1084

get_attr
EMObject get_attr(const string &attr_name) const
The generic way to get any image header information given a header attribute name.
Definition: emdata_metadata.cpp:1006

get_ysize
int get_ysize() const
Get the image y-dimensional size.
Definition: emdata_metadata.h:553

get_zsize
int get_zsize() const
Get the image z-dimensional size.
Definition: emdata_metadata.h:562

get_xsize
int get_xsize() const
Get the image x-dimensional size.
Definition: emdata_metadata.h:544

has_attr
bool has_attr(const string &key) const
Ask if the header has a particular attribute.
Definition: emdata_metadata.h:498

get_attr_default
EMObject get_attr_default(const string &attr_name, const EMObject &em_obj=EMObject()) const
The generic way to get any image header information given a header attribute name.

set_attr
void set_attr(const string &key, EMObject val)
Set a header attribute's value.

EMDeletePtr
void EMDeletePtr(T &x)
Definition: emutil.h:47

EMDeleteArray
void EMDeleteArray(T &x)
Definition: emutil.h:62

ImageDimensionException
#define ImageDimensionException(desc)
Definition: exception.h:166

NullPointerException
#define NullPointerException(desc)
Definition: exception.h:241

EMAN
E2Exception class.
Definition: aligner.h:40

EMAN::dump_analyzers_list
map< string, vector< string > > dump_analyzers_list()
Definition: analyzer.cpp:1027

EMAN::dump_analyzers
void dump_analyzers()
Definition: analyzer.cpp:1022

y
#define y(i, j)
Definition: projector.cpp:1516

x
#define x(i)
Definition: projector.cpp:1517

images
#define images(i, j, k)
Definition: projector.cpp:1897

util.h