EMAN2
Public Member Functions | Static Public Member Functions | Static Public Attributes | Protected Attributes | Private Attributes
EMAN::PCAlarge Class Reference

#include <analyzer_sparx.h>

Inheritance diagram for EMAN::PCAlarge:
Inheritance graph
[legend]
Collaboration diagram for EMAN::PCAlarge:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 PCAlarge ()
virtual int insert_image (EMData *image)
 insert a image to the list of input images
virtual vector< EMData * > analyze ()
 main function for Analyzer, analyze input images and create output images
string get_name () const
 Get the Analyzer's name.
string get_desc () const
 Get the Analyzer's description.
void set_params (const Dict &new_params)
 Set the Analyzer parameters using a key/value dictionary.
int Lanczos (const string &maskedimages, int *kstep, float *diag, float *subdiag, float *V, float *beta)
TypeDict get_param_types () const
 Get Analyzer parameter information in a dictionary.

Static Public Member Functions

static AnalyzerNEW ()

Static Public Attributes

static const string NAME = "pca_large"

Protected Attributes

EMDatamask
int nvec

Private Attributes

int ncov
int nimages
float * eigval

Detailed Description

Definition at line 106 of file analyzer_sparx.h.


Constructor & Destructor Documentation

EMAN::PCAlarge::PCAlarge ( ) [inline]

Definition at line 109 of file analyzer_sparx.h.

Referenced by NEW().

: mask(0), nvec(0) {}

Member Function Documentation

vector< EMData * > PCAlarge::analyze ( ) [virtual]

main function for Analyzer, analyze input images and create output images

Returns:
vector<EMData *> result os images analysis

Implements EMAN::Analyzer.

Definition at line 477 of file analyzer.cpp.

References diag, eigval, eigvec, EMDeletePtr(), EMAN::EMData::get_data(), EMAN::Analyzer::images, Lanczos(), mask, ncov, nimages, nvec, nx, qmat, rdata, reconstitute_image_mask(), EMAN::EMData::set_attr(), EMAN::EMData::set_size(), sgemv_(), sstevd_(), status, and subdiag.

{
        int status = 0;
        int ione = 1;
        float one = 1.0, zero = 0.0;
        char trans;
        float *eigvec;
        string scratchfile = string("maskedimages.scratch");
        char command[100];

        printf("start analyzing..., ncov = %d\n", ncov);

        float resnrm = 0.0;

        if ( nvec > nimages || nvec ==0 ) nvec = nimages;
        int nx = ncov;

        // the definition of kstep is purely a heuristic for right now
        int kstep = nvec + 20;
        if (kstep > nimages) kstep = nimages;

        float *diag    = new float[kstep];
        float *subdiag = new float[kstep-1];
        float *vmat    = new float[nx*kstep];

        // run kstep-step Lanczos factorization
        status = Lanczos(scratchfile, &kstep, diag, subdiag,
                         vmat, &resnrm);

        // remove scratch file
#ifdef _WIN32
        if (_unlink(scratchfile.c_str()) == -1) {
                fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
        }
#else
        sprintf(command,"rm -f %s\n", scratchfile.c_str());
        status = system(command);
        if (status != 0) {
                fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
        }
#endif  //_WIN32

        char jobz[2] = "V";
        float *qmat  = new float[kstep*kstep];
        // workspace size will be optimized later
        int   lwork  = 100 + 4*kstep + kstep*kstep;
        int   liwork = 3+5*kstep;

        float *work  = new float[lwork];
        int   *iwork = new int[liwork];
        int   info = 0;

        // call LAPACK tridiagonal eigensolver
        sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
                iwork, &liwork, &info);

        // store eigenvalues
        eigval = (float*)calloc(ncov,sizeof(float));
        eigvec = (float*)calloc(ncov*nvec,sizeof(float));

        for (int j = 0; j < nvec; j++) {
            eigval[j] = diag(kstep-j);
        }

//         for (int i=0; i<nvec; i++) printf("eigval = %11.4e\n",
//             eigval[i]);

        // compute eigenvectors
        for (int j=1; j<=nvec; j++) {
            trans = 'N';
            sgemv_(&trans, &nx,  &kstep, &one, vmat, &nx, &qmat(1,kstep-j+1),
                   &ione, &zero, &eigvec(1,j), &ione);
        }

        // pack eigenvectors into the return imagelist
        EMData *eigenimage = new EMData();
        eigenimage->set_size(ncov,1,1);
        float *rdata = eigenimage->get_data();
        for (int j = 1; j<= nvec; j++) {
            for (int i = 1; i <= ncov; i++)
                rdata(i) = eigvec(i,j);

            EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);

            recons_eigvec->set_attr( "eigval", eigval[j-1] );

            images.push_back( recons_eigvec );
        }

        free(eigvec);
        EMDeletePtr(eigenimage);

        return images;
}
string EMAN::PCAlarge::get_desc ( ) const [inline, virtual]

Get the Analyzer's description.

Returns:
The Analyzer's description.

Implements EMAN::Analyzer.

Definition at line 120 of file analyzer_sparx.h.

                {
                        return "Principal component analysis";
                }
string EMAN::PCAlarge::get_name ( ) const [inline, virtual]

Get the Analyzer's name.

Each Analyzer is identified by a unique name.

Returns:
The Analyzer's name.

Implements EMAN::Analyzer.

Definition at line 115 of file analyzer_sparx.h.

References NAME.

                {
                        return NAME;
                }               
TypeDict EMAN::PCAlarge::get_param_types ( ) const [inline, virtual]

Get Analyzer parameter information in a dictionary.

Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.

Returns:
A dictionary containing the parameter info.

Implements EMAN::Analyzer.

Definition at line 137 of file analyzer_sparx.h.

References EMAN::EMObject::EMDATA, EMAN::EMObject::INT, and EMAN::TypeDict::put().

                {
                        TypeDict d;
                        d.put("mask", EMObject::EMDATA, "mask image");
                        d.put("nvec", EMObject::INT, "number of desired principal components");
                        return d;
                }
int PCAlarge::insert_image ( EMData image) [virtual]

insert a image to the list of input images

Parameters:
image
Returns:
int 0 for success, <0 for fail

Implements EMAN::Analyzer.

Definition at line 421 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_data(), EMAN::EMData::get_xsize(), imgdata, mask, nimages, NullPointerException, and nx.

{
        if(mask==0)
                throw NullPointerException("Null mask image pointer, set_params() first");

   EMData *maskedimage = Util::compress_image_mask(image,mask);

   FILE *fp;
   string scratchfile = string("maskedimages.scratch");

   fp = fopen(scratchfile.c_str(),"ab");

   int nx = maskedimage->get_xsize();
   float *imgdata = maskedimage->get_data();
   fwrite(imgdata, sizeof(float), nx, fp);
   nimages++;

   fclose(fp);

   EMDeletePtr(maskedimage);

   return 0;
}
int PCAlarge::Lanczos ( const string &  maskedimages,
int *  kstep,
float *  diag,
float *  subdiag,
float *  V,
float *  beta 
)

Definition at line 585 of file analyzer.cpp.

References Av, diag, EMDeleteArray(), hvec, imgdata, ncov, nimages, saxpy_(), sdot_(), sgemv_(), snrm2_(), status, subdiag, TOL, V, and v0.

Referenced by analyze().

{
    /*
        Purpose: Compute a kstep-step Lanczos factorization
                 on the covariant matrix X*trans(X), where
                 X (imgstack) contains a set of images;

        Input:
           imgstack (vector <EMData*>) a set of images on which PCA is
                                       to be performed;

           kstep (int*) The maximum number of Lanczos iterations allowed.
                          If Lanczos terminates before kstep steps
                          is reached (an invariant subspace is found),
                          kstep returns the number of steps taken;

        Output:
           diag (float *) The projection of the covariant matrix into a
                          Krylov subspace of dimension at most kstep.
                          The projection is a tridiagonal matrix. The
                          diagonal elements of this matrix is stored in
                          the diag array.

           subdiag (float*) The subdiagonal elements of the projection
                            is stored here.

           V (float *)    an imgsize by kstep array that contains a
                          set of orthonormal Lanczos basis vectors;

           beta (float *) the residual norm of the factorization;
    */
    int i, iter;

    float alpha;
    int   ione = 1;
    float zero = 0.0, one = 1.0, mone = -1.0;
    int   status = 0;

    char trans;
    int  imgsize = 0;
    float *v0, *Av, *hvec, *htmp, *imgdata;
    FILE  *fp=NULL;

    if (nimages <= 0) {
        status = 2; // no image in the stack
        goto EXIT;
    }

    imgsize = ncov;
    if (nimages <= 0) {
        status = 3; // no image in the stack
        goto EXIT;
    }

    v0   = new float[imgsize];
    Av   = new float[imgsize];
    hvec = new float[*kstep];
    htmp = new float[*kstep];
    imgdata = new float[imgsize];

    if (v0 == NULL || Av == NULL || hvec == NULL ||
        htmp == NULL || imgdata == NULL) {
        fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");
        status = -1;
        goto EXIT;
    }

    // may choose a random starting guess here
    for ( i = 1; i <= imgsize; i++)
    {
        v0(i) = 1.0;
        Av(i) = 0.0;
    }

    // normalize the starting vector
    *beta  = snrm2_(&imgsize, v0, &ione);
    for (i = 1; i<=imgsize; i++)
        V(i,1) = v0(i) / (*beta);

    // do Av <-- A*v0, where A is a cov matrix
    fp = fopen(maskedimages.c_str(),"rb");
    if (fp==NULL) {
        fprintf(stderr,"Lanczos: cannot open %s\n", maskedimages.c_str());
    }
    for (i = 0; i < nimages; i++) {
       fread(imgdata, sizeof(float), imgsize, fp);
       alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);
       saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
    }
    fclose(fp);


    // Av <--- Av - V(:,1)*V(:,1)'*Av
    diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);
    alpha   = -diag(1);
    saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);

    // main loop
    for ( iter = 2 ; iter <= *kstep ; iter++ ) {
        *beta = snrm2_(&imgsize, Av, &ione);

        if (*beta < TOL) {
            // found an invariant subspace, exit
            *kstep = iter;
            break;
        }

        subdiag(iter-1) = *beta;
        for ( i = 1 ; i <= imgsize ; i++ ) {
            V(i,iter) = Av(i) / (*beta);
        }

        // do Av <-- A*V(:,iter), where A is a cov matrix
        for (i = 0; i < imgsize; i++) Av[i] = 0;
        fp = fopen(maskedimages.c_str(),"rb");
        for (i = 0; i < nimages; i++) {
           fread(imgdata, sizeof(float), imgsize, fp);
           alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);
           saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
        }
        fclose(fp);

        // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
        trans = 'T';
        status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
                        &zero , hvec    , &ione);
        trans = 'N';
        status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,
                        &ione , &one    , Av, &ione);

        // one step of reorthogonalization
        trans = 'T';
        status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
                        &zero , htmp    , &ione);
        saxpy_(&iter, &one, htmp, &ione, hvec, &ione);
        trans = 'N';
        status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,
                        &ione , &one    , Av, &ione);
        diag(iter) = hvec(iter);
    }

    EMDeleteArray(v0);
    EMDeleteArray(Av);
    EMDeleteArray(hvec);
    EMDeleteArray(htmp);
    EMDeleteArray(imgdata);

EXIT:
    return status;

}
static Analyzer* EMAN::PCAlarge::NEW ( ) [inline, static]

Definition at line 125 of file analyzer_sparx.h.

References PCAlarge().

                {
                        return new PCAlarge();
                }
void PCAlarge::set_params ( const Dict new_params) [virtual]

Set the Analyzer parameters using a key/value dictionary.

Parameters:
new_paramsA dictionary containing the new parameters.

Reimplemented from EMAN::Analyzer.

Definition at line 445 of file analyzer.cpp.

References compress_image_mask(), EMDeletePtr(), EMAN::EMData::get_xsize(), EMAN::EMData::get_ysize(), EMAN::EMData::get_zsize(), mask, ncov, nimages, nvec, nx, ny, EMAN::Analyzer::params, and EMAN::EMData::set_size().

{
        params = new_params;
        mask = params["mask"];
        nvec = params["nvec"];

        // count the number of pixels under the mask
        // (this is really ugly!!!)
        EMData *dummy = new EMData();

        int nx = mask->get_xsize();
        int ny = mask->get_ysize();
        int nz = mask->get_zsize();

        dummy->set_size(nx,ny,nz);

        EMData *dummy1d = Util::compress_image_mask(dummy,mask);

        ncov = dummy1d->get_xsize();

        EMDeletePtr(dummy);
        EMDeletePtr(dummy1d);
        // no need to allocate the covariance matrix
        nimages = 0;
}

Member Data Documentation

float* EMAN::PCAlarge::eigval [private]

Definition at line 154 of file analyzer_sparx.h.

Referenced by analyze().

Definition at line 148 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), and set_params().

const string EMAN::PCAlarge::NAME = "pca_large" [static]

Definition at line 145 of file analyzer_sparx.h.

Referenced by get_name().

int EMAN::PCAlarge::ncov [private]

Definition at line 152 of file analyzer_sparx.h.

Referenced by analyze(), Lanczos(), and set_params().

int EMAN::PCAlarge::nimages [private]

Definition at line 153 of file analyzer_sparx.h.

Referenced by analyze(), insert_image(), Lanczos(), and set_params().

int EMAN::PCAlarge::nvec [protected]

Definition at line 149 of file analyzer_sparx.h.

Referenced by analyze(), and set_params().


The documentation for this class was generated from the following files: