EMAN::PCAlarge Class Reference

#include <analyzer_sparx.h>

Inheritance diagram for EMAN::PCAlarge:

Inheritance graph
[legend]
Collaboration diagram for EMAN::PCAlarge:

Collaboration graph
[legend]

List of all members.

Public Member Functions

 PCAlarge ()
virtual int insert_image (EMData *image)
 insert a image to the list of input images
virtual vector< EMData * > analyze ()
 main function for Analyzer, analyze input images and create output images
string get_name () const
 Get the Analyzer's name.
string get_desc () const
 Get the Analyzer's description.
void set_params (const Dict &new_params)
 Set the Analyzer parameters using a key/value dictionary.
int Lanczos (const string &maskedimages, int *kstep, float *diag, float *subdiag, float *V, float *beta)
TypeDict get_param_types () const
 Get Analyzer parameter information in a dictionary.

Static Public Member Functions

static AnalyzerNEW ()

Protected Attributes

EMDatamask
int nvec

Private Attributes

int ncov
int nimages
float * eigval


Detailed Description

Definition at line 104 of file analyzer_sparx.h.


Constructor & Destructor Documentation

EMAN::PCAlarge::PCAlarge (  )  [inline]

Definition at line 107 of file analyzer_sparx.h.

Referenced by NEW().

00107 : mask(0), nvec(0) {}


Member Function Documentation

int PCAlarge::insert_image ( EMData image  )  [virtual]

insert a image to the list of input images

Parameters:
image 
Returns:
int 0 for success, <0 for fail

Implements EMAN::Analyzer.

Definition at line 337 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMData::get_xsize(), imgdata, mask, nimages, NullPointerException, and nx.

00338 {
00339         if(mask==0)
00340                 throw NullPointerException("Null mask image pointer, set_params() first");
00341 
00342    EMData *maskedimage = Util::compress_image_mask(image,mask);
00343 
00344    FILE *fp;
00345    string scratchfile = string("maskedimages.scratch");
00346 
00347    fp = fopen(scratchfile.c_str(),"a");
00348 
00349    int nx = maskedimage->get_xsize();
00350    float *imgdata = maskedimage->get_data();
00351    fwrite(imgdata, sizeof(float), nx, fp);
00352    nimages++;
00353 
00354    fclose(fp);
00355 
00356    EMDeletePtr(maskedimage);
00357 
00358    return 0;
00359 }

vector< EMData * > PCAlarge::analyze (  )  [virtual]

main function for Analyzer, analyze input images and create output images

Returns:
vector<EMData *> result os images analysis

Implements EMAN::Analyzer.

Definition at line 393 of file analyzer.cpp.

References diag, eigval, eigvec, EMDeletePtr(), EMAN::EMData::get_data(), EMAN::Analyzer::images, Lanczos(), mask, ncov, nimages, nvec, nx, qmat, rdata, reconstitute_image_mask(), EMAN::EMData::set_attr(), EMAN::EMData::set_size(), sgemv_(), sstevd_(), status, and subdiag.

00394 {
00395         int status = 0;
00396         int ione = 1;
00397         float one = 1.0, zero = 0.0;
00398         char trans;
00399         float *eigvec;
00400         string scratchfile = string("maskedimages.scratch");
00401         char command[100];
00402 
00403         printf("start analyzing..., ncov = %d\n", ncov);
00404 
00405         float resnrm = 0.0;
00406 
00407         if ( nvec > nimages || nvec ==0 ) nvec = nimages;
00408         int nx = ncov;
00409 
00410         // the definition of kstep is purely a heuristic for right now
00411         int kstep = nvec + 20;
00412         if (kstep > nimages) kstep = nimages;
00413 
00414         float *diag    = new float[kstep];
00415         float *subdiag = new float[kstep-1];
00416         float *vmat    = new float[nx*kstep];
00417 
00418         // run kstep-step Lanczos factorization
00419         status = Lanczos(scratchfile, &kstep, diag, subdiag,
00420                          vmat, &resnrm);
00421 
00422         // remove scratch file
00423         sprintf(command,"rm -f %s\n", scratchfile.c_str());
00424         status = system(command);
00425         if (status != 0) {
00426             fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
00427         }
00428 
00429         char jobz[2] = "V";
00430         float *qmat  = new float[kstep*kstep];
00431         // workspace size will be optimized later
00432         int   lwork  = 100 + 4*kstep + kstep*kstep;
00433         int   liwork = 3+5*kstep;
00434 
00435         float *work  = new float[lwork];
00436         int   *iwork = new int[liwork];
00437         int   info = 0;
00438 
00439         // call LAPACK tridiagonal eigensolver
00440         sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
00441                 iwork, &liwork, &info);
00442 
00443         // store eigenvalues
00444         eigval = (float*)calloc(ncov,sizeof(float));
00445         eigvec = (float*)calloc(ncov*nvec,sizeof(float));
00446 
00447         for (int j = 0; j < nvec; j++) {
00448             eigval[j] = diag(kstep-j);
00449         }
00450 
00451 //         for (int i=0; i<nvec; i++) printf("eigval = %11.4e\n",
00452 //             eigval[i]);
00453 
00454         // compute eigenvectors
00455         for (int j=1; j<=nvec; j++) {
00456             trans = 'N';
00457             sgemv_(&trans, &nx,  &kstep, &one, vmat, &nx, &qmat(1,kstep-j+1),
00458                    &ione, &zero, &eigvec(1,j), &ione);
00459         }
00460 
00461         // pack eigenvectors into the return imagelist
00462         EMData *eigenimage = new EMData();
00463         eigenimage->set_size(ncov,1,1);
00464         float *rdata = eigenimage->get_data();
00465         for (int j = 1; j<= nvec; j++) {
00466             for (int i = 1; i <= ncov; i++)
00467                 rdata(i) = eigvec(i,j);
00468 
00469             EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);
00470 
00471             recons_eigvec->set_attr( "eigval", eigval[j-1] );
00472 
00473             images.push_back( recons_eigvec );
00474         }
00475 
00476         free(eigvec);
00477         EMDeletePtr(eigenimage);
00478 
00479         return images;
00480 }

string EMAN::PCAlarge::get_name (  )  const [inline, virtual]

Get the Analyzer's name.

Each Analyzer is identified by a unique name.

Returns:
The Analyzer's name.

Implements EMAN::Analyzer.

Definition at line 113 of file analyzer_sparx.h.

00114                 {
00115                         return "pca_large";
00116                 }               

string EMAN::PCAlarge::get_desc (  )  const [inline, virtual]

Get the Analyzer's description.

Returns:
The Analyzer's description.

Implements EMAN::Analyzer.

Definition at line 118 of file analyzer_sparx.h.

00119                 {
00120                         return "Principal component analysis";
00121                 }

static Analyzer* EMAN::PCAlarge::NEW (  )  [inline, static]

Definition at line 123 of file analyzer_sparx.h.

References PCAlarge().

Referenced by EMAN::Factory< T >::Factory().

00124                 {
00125                         return new PCAlarge();
00126                 }

void PCAlarge::set_params ( const Dict new_params  )  [virtual]

Set the Analyzer parameters using a key/value dictionary.

Parameters:
new_params A dictionary containing the new parameters.

Reimplemented from EMAN::Analyzer.

Definition at line 361 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_xsize(), EMAN::EMData::get_ysize(), EMAN::EMData::get_zsize(), mask, ncov, nimages, nvec, nx, ny, EMAN::Analyzer::params, and EMAN::EMData::set_size().

00362 {
00363         params = new_params;
00364         mask = params["mask"];
00365         nvec = params["nvec"];
00366 
00367         // count the number of pixels under the mask
00368         // (this is really ugly!!!)
00369         EMData *dummy = new EMData();
00370 
00371         int nx = mask->get_xsize();
00372         int ny = mask->get_ysize();
00373         int nz = mask->get_zsize();
00374 
00375         dummy->set_size(nx,ny,nz);
00376 
00377         EMData *dummy1d = Util::compress_image_mask(dummy,mask);
00378 
00379         ncov = dummy1d->get_xsize();
00380 
00381         EMDeletePtr(dummy);
00382         EMDeletePtr(dummy1d);
00383         // no need to allocate the covariance matrix
00384         nimages = 0;
00385 }

int PCAlarge::Lanczos ( const string &  maskedimages,
int *  kstep,
float *  diag,
float *  subdiag,
float *  V,
float *  beta 
)

Definition at line 495 of file analyzer.cpp.

References Av, diag, EMDeleteArray(), hvec, imgdata, ncov, nimages, saxpy_(), sdot_(), sgemv_(), snrm2_(), status, subdiag, TOL, V, and v0.

Referenced by analyze().

00498 {
00499     /*
00500         Purpose: Compute a kstep-step Lanczos factorization
00501                  on the covariant matrix X*trans(X), where
00502                  X (imgstack) contains a set of images;
00503 
00504         Input:
00505            imgstack (vector <EMData*>) a set of images on which PCA is
00506                                        to be performed;
00507 
00508            kstep (int*) The maximum number of Lanczos iterations allowed.
00509                           If Lanczos terminates before kstep steps
00510                           is reached (an invariant subspace is found),
00511                           kstep returns the number of steps taken;
00512 
00513         Output:
00514            diag (float *) The projection of the covariant matrix into a
00515                           Krylov subspace of dimension at most kstep.
00516                           The projection is a tridiagonal matrix. The
00517                           diagonal elements of this matrix is stored in
00518                           the diag array.
00519 
00520            subdiag (float*) The subdiagonal elements of the projection
00521                             is stored here.
00522 
00523            V (float *)    an imgsize by kstep array that contains a
00524                           set of orthonormal Lanczos basis vectors;
00525 
00526            beta (float *) the residual norm of the factorization;
00527     */
00528     int i, iter;
00529 
00530     float alpha;
00531     int   ione = 1;
00532     float zero = 0.0, one = 1.0, mone = -1.0;
00533     int   status = 0;
00534 
00535     char trans;
00536     int  imgsize = 0;
00537     float *v0, *Av, *hvec, *htmp, *imgdata;
00538     FILE  *fp=NULL;
00539 
00540     if (nimages <= 0) {
00541         status = 2; // no image in the stack
00542         goto EXIT;
00543     }
00544 
00545     imgsize = ncov;
00546     if (nimages <= 0) {
00547         status = 3; // no image in the stack
00548         goto EXIT;
00549     }
00550 
00551     v0   = new float[imgsize];
00552     Av   = new float[imgsize];
00553     hvec = new float[*kstep];
00554     htmp = new float[*kstep];
00555     imgdata = new float[imgsize];
00556 
00557     if (v0 == NULL || Av == NULL || hvec == NULL ||
00558         htmp == NULL || imgdata == NULL) {
00559         fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");
00560         status = -1;
00561         goto EXIT;
00562     }
00563 
00564     // may choose a random starting guess here
00565     for ( i = 1; i <= imgsize; i++)
00566     {
00567         v0(i) = 1.0;
00568         Av(i) = 0.0;
00569     }
00570 
00571     // normalize the starting vector
00572     *beta  = snrm2_(&imgsize, v0, &ione);
00573     for (i = 1; i<=imgsize; i++)
00574         V(i,1) = v0(i) / (*beta);
00575 
00576     // do Av <-- A*v0, where A is a cov matrix
00577     fp = fopen(maskedimages.c_str(),"r");
00578     if (fp==NULL) {
00579         fprintf(stderr,"Lanczos: cannot open %s\n", maskedimages.c_str());
00580     }
00581     for (i = 0; i < nimages; i++) {
00582        fread(imgdata, sizeof(float), imgsize, fp);
00583        alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);
00584        saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00585     }
00586     fclose(fp);
00587 
00588 
00589     // Av <--- Av - V(:,1)*V(:,1)'*Av
00590     diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);
00591     alpha   = -diag(1);
00592     saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);
00593 
00594     // main loop
00595     for ( iter = 2 ; iter <= *kstep ; iter++ ) {
00596         *beta = snrm2_(&imgsize, Av, &ione);
00597 
00598         if (*beta < TOL) {
00599             // found an invariant subspace, exit
00600             *kstep = iter;
00601             break;
00602         }
00603 
00604         subdiag(iter-1) = *beta;
00605         for ( i = 1 ; i <= imgsize ; i++ ) {
00606             V(i,iter) = Av(i) / (*beta);
00607         }
00608 
00609         // do Av <-- A*V(:,iter), where A is a cov matrix
00610         for (i = 0; i < imgsize; i++) Av[i] = 0;
00611         fp = fopen(maskedimages.c_str(),"r");
00612         for (i = 0; i < nimages; i++) {
00613            fread(imgdata, sizeof(float), imgsize, fp);
00614            alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);
00615            saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
00616         }
00617         fclose(fp);
00618 
00619         // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
00620         trans = 'T';
00621         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00622                         &zero , hvec    , &ione);
00623         trans = 'N';
00624         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,
00625                         &ione , &one    , Av, &ione);
00626 
00627         // one step of reorthogonalization
00628         trans = 'T';
00629         status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
00630                         &zero , htmp    , &ione);
00631         saxpy_(&iter, &one, htmp, &ione, hvec, &ione);
00632         trans = 'N';
00633         status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,
00634                         &ione , &one    , Av, &ione);
00635         diag(iter) = hvec(iter);
00636     }
00637 
00638     EMDeleteArray(v0);
00639     EMDeleteArray(Av);
00640     EMDeleteArray(hvec);
00641     EMDeleteArray(htmp);
00642     EMDeleteArray(imgdata);
00643 
00644 EXIT:
00645     return status;
00646 
00647 }

TypeDict EMAN::PCAlarge::get_param_types (  )  const [inline, virtual]

Get Analyzer parameter information in a dictionary.

Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.

Returns:
A dictionary containing the parameter info.

Implements EMAN::Analyzer.

Definition at line 135 of file analyzer_sparx.h.

References EMAN::EMObject::EMDATA, EMAN::EMObject::INT, and EMAN::TypeDict::put().

00136                 {
00137                         TypeDict d;
00138                         d.put("mask", EMObject::EMDATA, "mask image");
00139                         d.put("nvec", EMObject::INT, "number of desired principal components");
00140                         return d;
00141                 }


Member Data Documentation

Definition at line 144 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), and set_params().

int EMAN::PCAlarge::nvec [protected]

Definition at line 145 of file analyzer_sparx.h.

Referenced by analyze(), and set_params().

int EMAN::PCAlarge::ncov [private]

Definition at line 148 of file analyzer_sparx.h.

Referenced by analyze(), Lanczos(), and set_params().

int EMAN::PCAlarge::nimages [private]

Definition at line 149 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), Lanczos(), and set_params().

float* EMAN::PCAlarge::eigval [private]

Definition at line 150 of file analyzer_sparx.h.

Referenced by analyze().


The documentation for this class was generated from the following files:

Generated on Sat Nov 21 02:20:44 2009 for EMAN2 by  doxygen 1.5.6