EMAN2
analyzer.cpp
Go to the documentation of this file.
1/*
2 * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
3 * Copyright (c) 2000-2006 Baylor College of Medicine
4 *
5 * This software is issued under a joint BSD/GNU license. You may use the
6 * source code in this file under either license. However, note that the
7 * complete EMAN2 and SPARX software packages have some GPL dependencies,
8 * so you are responsible for compliance with the licenses of these packages
9 * if you opt to use BSD licensing. The warranty disclaimer below holds
10 * in either instance.
11 *
12 * This complete copyright notice must be included in any revised version of the
13 * source code. Additional authorship citations may be added, but existing
14 * author citations must be preserved.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 *
30 * */
31
32#include <ctime>
33#include <memory>
34#include "emdata.h"
35#include "analyzer.h"
36#include "sparx/analyzer_sparx.h"
37#include "util.h"
38#include "cmp.h"
39#include "sparx/lapackblas.h"
40#include "sparx/varimax.h"
41
42using namespace EMAN;
43
44namespace EMAN {
45
46 const string PCAsmall::NAME = "pca";
47 const string PCAlarge::NAME = "pca_large";
48 const string varimax::NAME = "varimax";
49 const string InertiaMatrixAnalyzer::NAME = "inertiamatrix";
50 const string ShapeAnalyzer::NAME = "shape";
51 const string KMeansAnalyzer::NAME = "kmeans";
52 const string SVDAnalyzer::NAME = "svd_gsl";
53 const string CircularAverageAnalyzer::NAME = "cir_avg";
54
56 {
57 force_add<PCAsmall>();
58 force_add<PCAlarge>();
59 force_add<varimax>();
60 force_add<InertiaMatrixAnalyzer>();
61 force_add<ShapeAnalyzer>();
62 force_add<KMeansAnalyzer>();
63 force_add<SVDAnalyzer>();
64 force_add<CircularAverageAnalyzer>();
65 }
66
67}
68
69int Analyzer::insert_images_list(vector<EMData *> image_list)
70{
71 vector<EMData *>::const_iterator iter;
72 for(iter=image_list.begin(); iter!=image_list.end(); ++iter) {
73 insert_image(*iter);
74 }
75 return 0;
76}
77
79 int verbose = params.set_default("verbose",0);
80 EMData *mx = new EMData(3,3); // result is a 3x3 matrix
81 mx->to_zero();
82 ret.push_back(mx);
83
84 if (images.size()!=1) throw ImageDimensionException("Inertia matrix computation accepts only a single volume as input");
85 int nx=images[0]->get_xsize();
86 int ny=images[0]->get_ysize();
87 int nz=images[0]->get_zsize();
88 if (nz==1 || ny==1 || nz==1) throw ImageDimensionException("Map must be 3-D");
89
90 if (verbose>0) printf("Inertia volume size: %d %d %d\n",nx,ny,nz);
91
92 for (int z=0; z<nz; z++) {
93 for (int y=0; y<ny; y++) {
94 for (int x=0; x<nx; x++) {
95 int xx=x-nx/2;
96 int yy=y-ny/2;
97 int zz=z-nz/2;
98 float v=images[0]->get_value_at(x,y,z);
99 mx->set_value_at(0,0,mx->get_value_at(0,0)+v*(yy*yy+zz*zz));
100 mx->set_value_at(0,1,mx->get_value_at(0,1)+v*(-xx*yy));
101 mx->set_value_at(0,2,mx->get_value_at(0,2)+v*(-xx*zz));
102 mx->set_value_at(1,0,mx->get_value_at(1,0)+v*(-xx*yy));
103 mx->set_value_at(1,1,mx->get_value_at(1,1)+v*(zz*zz+xx*xx));
104 mx->set_value_at(1,2,mx->get_value_at(1,2)+v*(-yy*zz));
105 mx->set_value_at(2,0,mx->get_value_at(2,0)+v*(-xx*zz));
106 mx->set_value_at(2,1,mx->get_value_at(2,1)+v*(-yy*zz));
107 mx->set_value_at(2,2,mx->get_value_at(2,2)+v*(xx*xx+yy*yy));
108 }
109 }
110 }
111 mx->mult(1.0f/(nx*ny*nz));
112
113 if (verbose>0) {
114 printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,0),mx->get_value_at(1,0),mx->get_value_at(2,0));
115 printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,1),mx->get_value_at(1,1),mx->get_value_at(2,1));
116 printf("%1.3g\t%1.3g\t%1.3g\n",mx->get_value_at(0,2),mx->get_value_at(1,2),mx->get_value_at(2,2));
117 }
118
119 return ret;
120}
121
122vector<EMData *> ShapeAnalyzer::analyze() {
123 int verbose = params.set_default("verbose",0);
124 EMData *mx = new EMData(4,2,1); // result is 4 values
125 mx->to_zero();
126 ret.push_back(mx);
127
128 if (images.size()!=1) throw ImageDimensionException("Shape computation accepts only a single volume as input");
129 int nx=images[0]->get_xsize();
130 int ny=images[0]->get_ysize();
131 int nz=images[0]->get_zsize();
132 if (nz==1 || ny==1 || nz==1) throw ImageDimensionException("Map must be 3-D");
133
134 if (verbose>0) printf("Shape size: %d %d %d\n",nx,ny,nz);
135
136 for (int z=0; z<nz; z++) {
137 for (int y=0; y<ny; y++) {
138 for (int x=0; x<nx; x++) {
139 int xx=x-nx/2;
140 int yy=y-ny/2;
141 int zz=z-nz/2;
142 float v=images[0]->get_value_at(x,y,z);
143 mx->set_value_at(0,0,mx->get_value_at(0,0)+v*(xx*xx));
144 mx->set_value_at(1,0,mx->get_value_at(1,0)+v*(yy*yy));
145 mx->set_value_at(2,0,mx->get_value_at(2,0)+v*(zz*zz));
146 mx->set_value_at(3,0,mx->get_value_at(3,0)+v*(xx*xx+yy*yy+zz*zz));
147 // sum(m*r^2), in which r is the distance to the center. Used for minicircle classification
148 mx->set_value_at(0,1,mx->get_value_at(0,0)+v*abs(xx));
149 mx->set_value_at(1,1,mx->get_value_at(1,0)+v*abs(yy));
150 mx->set_value_at(2,1,mx->get_value_at(2,0)+v*abs(zz));
151 mx->set_value_at(3,1,mx->get_value_at(3,0)+v*(float)sqrt((float)(xx*xx+yy*yy+zz*zz)));
152 }
153 }
154 }
155 mx->mult(1.0f/(nx*ny*nz));
156
157 return ret;
158}
159
160
161void KMeansAnalyzer::set_params(const Dict & new_params)
162{
163 params = new_params;
164 if (params.has_key("ncls")) ncls = nclstot = params["ncls"];
165 if (params.has_key("maxiter"))maxiter = params["maxiter"];
166 if (params.has_key("minchange"))minchange = params["minchange"];
167 if (params.has_key("mininclass"))mininclass = params["mininclass"];
168 if (params.has_key("slowseed"))slowseed = params["slowseed"];
169 if (params.has_key("verbose"))verbose = params["verbose"];
170 if (params.has_key("calcsigmamean")) calcsigmamean=params["calcsigmamean"];
171 if (params.has_key("outlierclass")) outlierclass=params["outlierclass"];
172}
173
174vector<EMData *> KMeansAnalyzer::analyze()
175{
176if (ncls<=1) return vector<EMData *>();
177//srandom(time(0));
178
179// These are the class centers, start each with a random image
180int nptcl=images.size();
181if (calcsigmamean) centers.resize(nclstot*2);
182else centers.resize(nclstot);
183if (mininclass<1) mininclass=1;
184
185int seedmode=params.set_default("seedmode",(int)0);
186
187// in outlier mode we don't use the bad center concept
188if (outlierclass==0) {
189 for (int i=0; i<nptcl; i++) images[i]->set_attr("is_ok_center",(int)5); // if an image becomes part of too small a set, it will (eventually) be marked as a bad center
190}
191
192if (slowseed) {
193 if (ncls>25) ncls=slowseed=ncls/25+1; // this becomes the number to seed in each step
194// if (maxiter<ncls*3+20) maxiter=ncls*3+20; // We need to make sure we have enough iterations to seed all of the classes
195// ncls=2;
196}
197
198if (seedmode==0) {
199 for (int i=0; i<ncls; i++) {
200 // Fixed by d.woolford, Util.get_irand is inclusive (added a -1)
201 centers[i]=images[Util::get_irand(0,nptcl-1)]->copy();
202 }
203}
204else if (seedmode==1) {
205 // find the images with the largest and smallest sum
206 EMData *max;
207 float maxv=-1.0e27;
208 EMData *min;
209 float minv=1.0e27;
210 for (int i=0; i<nptcl; i++) {
211 float m = images[i]->get_attr("mean");
212 if (m<minv) { minv=m; min=images[i]; }
213 if (m>maxv) { maxv=m; max=images[i]; }
214 }
215 centers[0]=min->copy();
216 centers[ncls-1]=max->copy();
217
218 // now fill in linear interpolates in between
219 for (int i=1; i<ncls-1; i++) {
220 centers[i]=centers[0]->copy();
221 centers[i]->mult((ncls-i-1.0f)/(ncls-1.0f));
222 EMData *tmp=max->copy();
223 tmp->mult(i/(ncls-1.0f));
224 centers[i]->add(*tmp);
225 delete tmp;
226 }
227}
228
229if (calcsigmamean) {
230 for (int i=nclstot; i<nclstot*2; i++) centers[i]=new EMData(images[0]->get_xsize(),images[0]->get_ysize(),images[0]->get_zsize());
231}
232
233
234for (int i=0; i<maxiter; i++) {
235 nchanged=0;
236 resort();
237 reclassify();
239 if (verbose) printf("iter %d> %d (%d)\n",i,nchanged,ncls);
240 if (nchanged<minchange && ncls==nclstot) break;
241
242 if (slowseed && i%3==2 && ncls<nclstot) {
243 for (int j=0; j<slowseed && ncls<nclstot; j++) {
244 centers[ncls]=0;
245 ncls++;
246 }
247 reseed();
248 }
249}
251
252return centers;
253}
254
256int nptcl=images.size();
257//int repr[ncls];
258vector<int> repr(ncls);
259
260for (int i=0; i<ncls; i++) {
261 centers[i]->to_zero();
262 if (sigmas) centers[i+ncls]->to_zero();
263 repr[i]=0;
264 centers[i]->set_attr("worst_ptcldist",0.0f);
265}
266
267// compute new position for each center
268for (int i=0; i<nptcl; i++) {
269 int cid=images[i]->get_attr("class_id");
270 // outlier mode disables is_ok_center functionality
271 if (outlierclass || (int)images[i]->get_attr("is_ok_center")>0) {
272 centers[cid]->add(*images[i]);
273 if (sigmas) centers[cid+ncls]->addsquare(*images[i]);
274 repr[cid]++;
275 float imdist=images[i]->get_attr("class_cendist");
276 if (imdist>(float)centers[cid]->get_attr("worst_ptcldist")) {
277 centers[cid]->set_attr("worst_ptcldist",imdist);
278 centers[cid]->set_attr("worst_ptcl",i);
279 }
280 }
281}
282
283for (int i=0; i<ncls; i++) {
284 // If this class is too small, outlier class is never reseeded
285 if (repr[i]<mininclass && (outlierclass==0||i<nclstot-1)) {
286 // find all of the particles in the class, and decrement their "is_ok_center" counter.
287 // when it reaches zero the particle will no longer participate in determining the location of a center
288 if (outlierclass) { // outliers are relegated to the outlier class permanently
289 for (int j=0; j<nptcl; j++) {
290 if ((int)images[j]->get_attr("class_id")==i) {
291 if (verbose) printf("outlier: %d\n",j);
292 images[j]->set_attr("class_id",nclstot-1);
293 //nchanged++; // should happen automatically below
294 }
295 }
296 }
297 // if not using outlier class, we use "is_ok_center" concept to reduce influence of outliers
298 else {
299 for (int j=0; j<nptcl; j++) {
300 if ((int)images[j]->get_attr("class_id")==i) images[i]->set_attr("is_ok_center",(int)images[i]->get_attr("is_ok_center")-1);
301 }
302 }
303 // Mark the center for reseeding
304 delete centers[i];
305 centers[i]=0;
306 repr[i]=0;
307 }
308 // finishes off the statistics we started computing above
309 else {
310 centers[i]->mult((float)1.0/(float)(repr[i]));
311 centers[i]->set_attr("ptcl_repr",repr[i]);
312 if (sigmas) {
313 centers[i+ncls]->mult((float)1.0/(float)(repr[i])); // sum of squares over n
314 centers[i+ncls]->subsquare(*centers[i]); // subtract the mean value squared
315 centers[i+ncls]->process("math.sqrt"); // square root
316 centers[i+ncls]->mult((float)1.0/(float)sqrt((float)repr[i])); // divide by sqrt(N) to get std. dev. of mean
317 }
318
319 }
320 if (verbose>1) printf("%d(%d)\t",i,(int)repr[i]);
321}
322
323if (verbose>1) printf("\n");
324
325reseed();
326}
327
328// This will look for any unassigned points and reseed each inside the class with the broadest distribution widely distributed
330int nptcl=images.size();
331int i,j;
332
333// if no classes need reseeding just return
334for (i=0; i<ncls; i++) {
335 if (!centers[i]) break;
336}
337if (i==ncls) return;
338
339// make a list of all particles which could be centers
340vector<int> goodcen;
341if (outlierclass) {
342 for (int i=0; i<nptcl; i++) { if ((int)images[i]->get_attr("class_id")!=nclstot-1) goodcen.push_back(i); }
343}
344else {
345// printf("c%d\n",outlierclass);
346 for (int i=0; i<nptcl; i++) { if ((int)images[i]->get_attr("is_ok_center")>0) goodcen.push_back(i); }
347}
348
349if (goodcen.size()==0) {
350 printf("Kmeans ran out of valid center particles, disabling outlier mode and finishing. Results not valid.\n");
351 for (int i=0; i<nptcl; i++) goodcen.push_back(i);
352 outlierclass=0;
353 return;
354}
355// throw UnexpectedBehaviorException("Kmeans ran out of valid center particles with the provided parameters");
356
357// pick a random particle for the new seed
358// for (i=0; i<ncls; i++) {
359// if (centers[i]) continue; // center doesn't need reseeding
360// j=Util::get_irand(0,goodcen.size()-1);
361// centers[i]=images[j]->copy(); // Isn't this wrong? Should it be looking in goodcen?
362// centers[i]->set_attr("ptcl_repr",1);
363// printf("reseed %d -> %d\n",i,j);
364// }
365
366// use a valid center with a large distance for the new seed
367for (i=0; i<ncls; i++) {
368 if (centers[i]) continue; // center doesn't need reseeding
369 if (outlierclass) j=Util::get_irand(0,ncls-2); // don't reuse particles identified as outliers
370 else j=Util::get_irand(0,ncls-1); // pick a random class
371 // The worst particle method with outliers often 'eats' all of the particles
372 if (!outlierclass && centers[j] && centers[j]->has_attr("worst_ptcl")) { // try to use the worst particle from that class
373 centers[i]=images[(int)centers[j]->get_attr("worst_ptcl")]->copy();
374 printf("reseed %d -> worst (cls %d)\n",i,j);
375 }
376 else {
377 j=Util::get_irand(0,goodcen.size()-1);
378 centers[i]=images[goodcen[j]]->copy();
379 printf("reseed %d -> %d\n",i,j);
380 }
381 centers[i]->set_attr("ptcl_repr",1);
382}
383
384
385}
386
387// a quick MSD between two images, MUCH faster than using sqeuclidean for small images
388float qsqcmp(EMData *a,EMData *b) {
389 size_t n = a->get_size();
390 float *d1=a->get_data();
391 float *d2=b->get_data();
392
393 double ret=0.0;
394 for (size_t i=0; i<n; i++) ret+=pow(d1[i]-d2[i],2);
395
396 return (float)ret;
397}
398
399// Tries to generate a reasonable similarity path through the centers to put more similar centers closer to each other
401
402// Cmp *c = Factory < Cmp >::get("sqeuclidean");
403
404 // The first center remains first, we proceed from that starting point
405 // simple shells sort to an out-of-place reference
406 int sortmax=ncls;
407 if (outlierclass && ncls==nclstot) sortmax--; // outlier class must not get resorted!
408
409 for (int i=1; i<sortmax; i++) {
410 float bst=1.0e22;
411 for (int j=i; j<sortmax; j++) {
412// float d=c->cmp(centers[i-1],centers[j]);
413 float d=qsqcmp(centers[i-1],centers[j]);
414 if (d<bst) {
415 bst=d;
416 if (j!=i) {
417 EMData *tmp=centers[j];
418 centers[j]=centers[i];
419 centers[i]=tmp;
420 }
421 }
422 }
423 }
424
425// delete c;
426}
427
428// Redetermine which class each particle belongs in
430int nptcl=images.size();
431
432//Cmp *c = Factory < Cmp >::get("sqeuclidean");
433for (int i=0; i<nptcl; i++) {
434 if (outlierclass && (int)images[i]->get_attr_default("class_id",0)==nclstot-1) continue; // outliers are forever
435 float best=1.0e38f;
436 int bestn=0;
437 int lim=ncls;
438 if (outlierclass) lim=ncls-1; // particles don't join the outliers based on distance
439 for (int j=0; j<lim; j++) {
440// float d=c->cmp(images[i],centers[j]);
441 float d=qsqcmp(images[i],centers[j]);
442 if (d<best) { best=d; bestn=j; }
443 }
444 int oldn=images[i]->get_attr_default("class_id",0);
445 if (oldn!=bestn) nchanged++;
446 images[i]->set_attr("class_id",bestn);
447 images[i]->set_attr("class_cendist",best); // store this for reseeding
448}
449//delete c;
450}
451
452#define covmat(i,j) covmat[ ((j)-1)*nx + (i)-1 ]
453#define imgdata(i) imgdata[ (i)-1 ]
454int PCAsmall::insert_image(EMData * image)
455{
456 if(mask==0)
457 throw NullPointerException("Null mask image pointer, set_params() first");
458
459 EMData *maskedimage = Util::compress_image_mask(image,mask);
460
461 int nx = maskedimage->get_xsize();
462 float *imgdata = maskedimage->get_data();
463 if (nx != ncov) {
464 fprintf(stderr,"insert_image: something is wrong...\n");
465 exit(1);
466 }
467
468 // there is a faster version of the following rank-1 update
469 nimages++;
470 for (int j = 1; j <= nx; j++)
471 for (int i = 1; i<=nx; i++) {
472 covmat(i,j) += imgdata(i)*imgdata(j);
473 }
474
475 EMDeletePtr(maskedimage);
476 return 0;
477}
478#undef covmat
479
480#define eigvec(i,j) eigvec[(j)*ncov + (i)]
481vector<EMData*> PCAsmall::analyze()
482{
483 float *eigvec;
484 int status = 0;
485// printf("start analyzing..., ncov = %d\n", ncov);
486 eigval = (float*)calloc(ncov,sizeof(float));
487 eigvec = (float*)calloc(ncov*ncov,sizeof(float));
488 status = Util::coveig(ncov, covmat, eigval, eigvec);
489// for (int i=1; i<=nvec; i++) printf("eigval = %11.4e\n",
490// eigval[ncov-i]);
491
492 // pack eigenvectors into the return imagelist
493 EMData *eigenimage = new EMData();
494 eigenimage->set_size(ncov,1,1);
495 float *rdata = eigenimage->get_data();
496 for (int j = 1; j<= nvec; j++) {
497 for (int i = 0; i < ncov; i++) rdata[i] = eigvec(i,ncov-j);
498
499 EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);
500 recons_eigvec->set_attr( "eigval", eigval[j-1] );
501 images.push_back(recons_eigvec);
502 }
503
504 free(eigvec);
505 EMDeletePtr(eigenimage);
506
507 return images;
508}
509#undef eigvec
510
511void PCAsmall::set_params(const Dict & new_params)
512{
513 params = new_params;
514 mask = params["mask"];
515 nvec = params["nvec"];
516
517 // count the number of pixels under the mask
518 // (this is really ugly!!!)
519 EMData *dummy = new EMData();
520
521 int nx = mask->get_xsize();
522 int ny = mask->get_ysize();
523 int nz = mask->get_zsize();
524
525 dummy->set_size(nx,ny,nz);
526
527 EMData *dummy1d = Util::compress_image_mask(dummy,mask);
528 ncov = dummy1d->get_xsize();
529 EMDeletePtr(dummy);
530 EMDeletePtr(dummy1d);
531
532 // allocate and set up the covriance matrix
533 nimages = 0;
534 covmat = (float*)calloc(ncov*ncov,sizeof(float));
535}
536
537//------------------------------------------------------------------
538// for large-scale PCA incore
539
540int PCAlarge::insert_image(EMData * image)
541{
542 if(mask==0)
543 throw NullPointerException("Null mask image pointer, set_params() first");
544
545 EMData *maskedimage = Util::compress_image_mask(image,mask);
546
547 FILE *fp;
548 string scratchfile = params.set_default("tmpfile","maskedimages.scratch");
549
550 fp = fopen(scratchfile.c_str(),"ab");
551
552 int nx = maskedimage->get_xsize();
553 float *imgdata = maskedimage->get_data();
554 fwrite(imgdata, sizeof(float), nx, fp);
555 nimages++;
556
557 fclose(fp);
558
559 EMDeletePtr(maskedimage);
560
561 return 0;
562}
563
564void PCAlarge::set_params(const Dict & new_params)
565{
566 params = new_params;
567 mask = params["mask"];
568 nvec = params["nvec"];
569
570 // count the number of pixels under the mask
571 // (this is really ugly!!!)
572 EMData *dummy = new EMData();
573
574 int nx = mask->get_xsize();
575 int ny = mask->get_ysize();
576 int nz = mask->get_zsize();
577
578 dummy->set_size(nx,ny,nz);
579
580 EMData *dummy1d = Util::compress_image_mask(dummy,mask);
581
582 ncov = dummy1d->get_xsize();
583
584 EMDeletePtr(dummy);
585 EMDeletePtr(dummy1d);
586 // no need to allocate the covariance matrix
587 nimages = 0;
588}
589
590#define qmat(i,j) qmat[((j)-1)*kstep + (i) -1]
591#define diag(i) diag[(i)-1]
592#define rdata(i) rdata[(i)-1]
593#define eigvec(i,j) eigvec[((j)-1)*ncov + (i)-1]
594#define eigval(i) eigval[(i)-1]
595
596vector<EMData*> PCAlarge::analyze()
597{
598 int status = 0;
599 int ione = 1;
600 float one = 1.0, zero = 0.0;
601 char trans;
602 float *eigvec;
603 string scratchfile = (string) params["tmpfile"];
604 char command[100];
605
606// printf("start analyzing..., ncov = %d\n", ncov);
607
608 float resnrm = 0.0;
609
610 if ( nvec > nimages || nvec ==0 ) nvec = nimages;
611 int nx = ncov;
612
613 // the definition of kstep is purely a heuristic for right now
614 int kstep = nvec*2 + 20;
615 if (kstep > nimages) kstep = nimages;
616
617 float *diag = new float[kstep];
618 float *subdiag = new float[kstep-1];
619 float *vmat = new float[nx*kstep];
620
621 // run kstep-step Lanczos factorization
622 status = Lanczos(scratchfile, &kstep, diag, subdiag,
623 vmat, &resnrm);
624
625 // remove scratch file
626#ifdef _WIN32
627 if (_unlink(scratchfile.c_str()) == -1) {
628 fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
629 }
630#else
631 sprintf(command,"rm -f %s\n", scratchfile.c_str());
632 status = system(command);
633 if (status != 0) {
634 fprintf(stderr,"PCAlarge: cannot remove scratchfile\n");
635 }
636#endif //_WIN32
637
638 char jobz[2] = "V";
639 float *qmat = new float[kstep*kstep];
640 // workspace size will be optimized later
641 int lwork = 100 + 4*kstep + kstep*kstep;
642 int liwork = 3+5*kstep;
643
644 float *work = new float[lwork];
645 int *iwork = new int[liwork];
646 int info = 0;
647
648 // call LAPACK tridiagonal eigensolver
649 sstevd_(jobz, &kstep, diag, subdiag, qmat, &kstep, work, &lwork,
650 iwork, &liwork, &info);
651
652 // store eigenvalues
653 eigval = (float*)calloc(ncov,sizeof(float));
654 eigvec = (float*)calloc(ncov*nvec,sizeof(float));
655
656 for (int j = 0; j < nvec; j++) {
657 eigval[j] = diag(kstep-j);
658 }
659
660// for (int i=0; i<nvec; i++) printf("eigval = %11.4e\n",
661// eigval[i]);
662
663 // compute eigenvectors
664 for (int j=1; j<=nvec; j++) {
665 trans = 'N';
666 sgemv_(&trans, &nx, &kstep, &one, vmat, &nx, &qmat(1,kstep-j+1),
667 &ione, &zero, &eigvec(1,j), &ione);
668 }
669
670 // pack eigenvectors into the return imagelist
671 EMData *eigenimage = new EMData();
672 eigenimage->set_size(ncov,1,1);
673 float *rdata = eigenimage->get_data();
674 for (int j = 1; j<= nvec; j++) {
675 for (int i = 1; i <= ncov; i++)
676 rdata(i) = eigvec(i,j);
677
678 EMData* recons_eigvec = Util::reconstitute_image_mask(eigenimage,mask);
679
680 recons_eigvec->set_attr( "eigval", eigval[j-1] );
681
682 images.push_back( recons_eigvec );
683 }
684
685 free(eigvec);
686 EMDeletePtr(eigenimage);
687
688 return images;
689}
690#undef qmat
691#undef diag
692#undef rdata
693#undef eigvec
694#undef eigval
695
696#define TOL 1e-7
697#define V(i,j) V[((j)-1)*imgsize + (i) - 1]
698#define v0(i) v0[(i)-1]
699#define Av(i) Av[(i)-1]
700#define subdiag(i) subdiag[(i)-1]
701#define diag(i) diag[(i)-1]
702#define hvec(i) hvec[(i)-1]
703
704int PCAlarge::Lanczos(const string &maskedimages, int *kstep,
705 float *diag, float *subdiag, float *V,
706 float *beta)
707{
708 /*
709 Purpose: Compute a kstep-step Lanczos factorization
710 on the covariant matrix X*trans(X), where
711 X (imgstack) contains a set of images;
712
713 Input:
714 imgstack (vector <EMData*>) a set of images on which PCA is
715 to be performed;
716
717 kstep (int*) The maximum number of Lanczos iterations allowed.
718 If Lanczos terminates before kstep steps
719 is reached (an invariant subspace is found),
720 kstep returns the number of steps taken;
721
722 Output:
723 diag (float *) The projection of the covariant matrix into a
724 Krylov subspace of dimension at most kstep.
725 The projection is a tridiagonal matrix. The
726 diagonal elements of this matrix is stored in
727 the diag array.
728
729 subdiag (float*) The subdiagonal elements of the projection
730 is stored here.
731
732 V (float *) an imgsize by kstep array that contains a
733 set of orthonormal Lanczos basis vectors;
734
735 beta (float *) the residual norm of the factorization;
736 */
737 int i, iter;
738
739 float alpha;
740 int ione = 1;
741 float zero = 0.0, one = 1.0, mone = -1.0;
742 int status = 0;
743
744 char trans;
745 int imgsize = 0;
746 float *v0, *Av, *hvec, *htmp, *imgdata;
747 FILE *fp=NULL;
748
749 if (nimages <= 0) {
750 status = 2; // no image in the stack
751 goto EXIT;
752 }
753
754 imgsize = ncov;
755 if (nimages <= 0) {
756 status = 3; // no image in the stack
757 goto EXIT;
758 }
759
760 v0 = new float[imgsize];
761 Av = new float[imgsize];
762 hvec = new float[*kstep];
763 htmp = new float[*kstep];
764 imgdata = new float[imgsize];
765
766 if (v0 == NULL || Av == NULL || hvec == NULL ||
767 htmp == NULL || imgdata == NULL) {
768 fprintf(stderr, "Lanczos: failed to allocate v0,Av,hvec,htmp\n");
769 status = -1;
770 goto EXIT;
771 }
772
773 // may choose a random starting guess here
774 for ( i = 1; i <= imgsize; i++)
775 {
776 v0(i) = 1.0;
777 Av(i) = 0.0;
778 }
779
780 // normalize the starting vector
781 *beta = snrm2_(&imgsize, v0, &ione);
782 for (i = 1; i<=imgsize; i++)
783 V(i,1) = v0(i) / (*beta);
784
785 // do Av <-- A*v0, where A is a cov matrix
786 fp = fopen(maskedimages.c_str(),"rb");
787 if (fp==NULL) {
788 fprintf(stderr,"Lanczos: cannot open %s\n", maskedimages.c_str());
789 }
790 for (i = 0; i < nimages; i++) {
791 fread(imgdata, sizeof(float), imgsize, fp);
792 alpha = sdot_(&imgsize, imgdata, &ione, V, &ione);
793 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
794 }
795 fclose(fp);
796
797
798 // Av <--- Av - V(:,1)*V(:,1)'*Av
799 diag(1) = sdot_(&imgsize, V, &ione, Av, &ione);
800 alpha = -diag(1);
801 saxpy_(&imgsize, &alpha, V, &ione, Av, &ione);
802
803 // main loop
804 for ( iter = 2 ; iter <= *kstep ; iter++ ) {
805 *beta = snrm2_(&imgsize, Av, &ione);
806
807 if (*beta < TOL) {
808 // found an invariant subspace, exit
809 *kstep = iter;
810 break;
811 }
812
813 subdiag(iter-1) = *beta;
814 for ( i = 1 ; i <= imgsize ; i++ ) {
815 V(i,iter) = Av(i) / (*beta);
816 }
817
818 // do Av <-- A*V(:,iter), where A is a cov matrix
819 for (i = 0; i < imgsize; i++) Av[i] = 0;
820 fp = fopen(maskedimages.c_str(),"rb");
821 for (i = 0; i < nimages; i++) {
822 fread(imgdata, sizeof(float), imgsize, fp);
823 alpha = sdot_(&imgsize, imgdata, &ione, &V(1,iter), &ione);
824 saxpy_(&imgsize, &alpha, imgdata, &ione, Av, &ione);
825 }
826 fclose(fp);
827
828 // f <--- Av - V(:,1:iter)*V(:,1:iter)'*Av
829 trans = 'T';
830 status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
831 &zero , hvec , &ione);
832 trans = 'N';
833 status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, hvec,
834 &ione , &one , Av, &ione);
835
836 // one step of reorthogonalization
837 trans = 'T';
838 status = sgemv_(&trans, &imgsize, &iter, &one, V, &imgsize, Av, &ione,
839 &zero , htmp , &ione);
840 saxpy_(&iter, &one, htmp, &ione, hvec, &ione);
841 trans = 'N';
842 status = sgemv_(&trans, &imgsize, &iter, &mone, V, &imgsize, htmp,
843 &ione , &one , Av, &ione);
844 diag(iter) = hvec(iter);
845 }
846
850 EMDeleteArray(htmp);
852
853EXIT:
854 return status;
855
856}
857#undef v0
858#undef Av
859#undef V
860#undef hvec
861#undef diag
862#undef subdiag
863#undef TOL
864
865void varimax::set_params(const Dict & new_params)
866{
867 params = new_params;
868 m_mask = params["mask"];
869
870 // count the number of pixels under the mask
871 // (this is really ugly!!!)
872 EMData *dummy = new EMData();
873
874 int nx = m_mask->get_xsize();
875 int ny = m_mask->get_ysize();
876 int nz = m_mask->get_zsize();
877
878 dummy->set_size(nx,ny,nz);
879
880 EMData *dummy1d = Util::compress_image_mask(dummy,m_mask);
881
882 m_nlen = dummy1d->get_xsize();
883 m_nfac = 0;
884
885 EMDeletePtr(dummy);
886 EMDeletePtr(dummy1d);
887}
888
889int varimax::insert_image(EMData* image)
890{
891 if(m_mask==0)
892 throw NullPointerException("Null mask image pointer, set_params() first");
893
894 EMData* img1d = Util::compress_image_mask(image,m_mask);
895
896 m_data.insert( m_data.end(), img1d->get_data(), img1d->get_data() + m_nlen );
897
898 m_nfac++;
899
900 Assert( (int)m_data.size() == m_nfac*m_nlen);
901
902 return 0;
903}
904
905vector<EMData*> varimax::analyze()
906{
907 int itmax = 10000;
908 float eps = 1e-4f;
909 int verbose = 1;
910 float params[4];
911 params[0] = 1.0;
912 varmx( &m_data[0], m_nlen, m_nfac, IVARIMAX, params, NULL, itmax, eps, verbose);
913
914 vector<EMData*> images;
915
916 EMData* img1d = new EMData();
917 img1d->set_size(m_nlen, 1, 1);
918 for( int i=0; i < m_nfac; ++i )
919 {
920 float* imgdata = img1d->get_data();
921
922 int offset = i * m_nlen;
923 for( int i=0; i < m_nlen; ++i )
924 {
925 imgdata[i] = m_data[offset+i];
926 }
927
928 EMData* img = Util::reconstitute_image_mask(img1d,m_mask);
929 images.push_back(img);
930 }
931
932 EMDeletePtr(img1d);
933
934 return images;
935}
936
938{
939 if (mask==0)
940 throw NullPointerException("Null mask image pointer, set_params() first");
941
942 // count pixels under mask
943 size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();
944 float *d=image->get_data();
945 float *md=mask ->get_data();
946 for (size_t i=0,j=0; i<totpix; ++i) {
947 if (md[i]) {
948 gsl_matrix_set(A,j,nsofar,d[i]);
949 j++;
950 }
951 }
952 nsofar++;
953
954 return 0;
955}
956#undef covmat
957
958#define eigvec(i,j) eigvec[(j)*ncov + (i)]
959vector<EMData*> SVDAnalyzer::analyze()
960{
961// Allocate the working space
962gsl_vector *work=gsl_vector_alloc(nimg);
963gsl_vector *S=gsl_vector_alloc(nimg);
964gsl_matrix *V=gsl_matrix_alloc(nimg,nimg);
965gsl_matrix *X=gsl_matrix_alloc(nimg,nimg);
966
967
968// Do the decomposition. All the real work is here
969gsl_linalg_SV_decomp_mod (A,X, V, S, work);
970//else gsl_linalg_SV_decomp_jacobi(A,V,S);
971
972vector<EMData*> ret;
973//unpack the results and write the output file
974float *md=mask->get_data();
975size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();
976for (int k=0; k<nvec; k++) {
977 EMData *img = new EMData;
978 img->set_size(mask->get_xsize(),mask->get_ysize(),mask->get_zsize());
979
980 float *d=img->get_data();
981 for (size_t i=0,j=0; i<totpix; ++i) {
982 if (md[i]) {
983 d[i]=(float)gsl_matrix_get(A,j,k);
984 j++;
985 }
986 }
987 img->set_attr( "eigval", gsl_vector_get(S,k));
988 ret.push_back(img);
989}
990
991gsl_vector_free(work);
992gsl_vector_free(S);
993gsl_matrix_free(V);
994gsl_matrix_free(X);
995
996gsl_matrix_free(A);
997A=NULL;
998mask=NULL;
999
1000return ret;
1001}
1002
1003void SVDAnalyzer::set_params(const Dict & new_params)
1004{
1005 params = new_params;
1006 mask = params["mask"];
1007 nvec = params["nvec"];
1008 nimg = params["nimg"];
1009
1010 // count pixels under mask
1011 pixels=0;
1012 size_t totpix=mask->get_xsize()*mask->get_ysize()*mask->get_zsize();
1013 float *d=mask->get_data();
1014 for (size_t i=0; i<totpix; ++i) if (d[i]) ++pixels;
1015
1016 printf("%d,%d\n",pixels,nimg);
1017 A=gsl_matrix_alloc(pixels,nimg);
1018 nsofar=0;
1019}
1020
1021
1023{
1024 dump_factory < Analyzer > ();
1025}
1026
1027map<string, vector<string> > EMAN::dump_analyzers_list()
1028{
1029 return dump_factory_list < Analyzer > ();
1030}
1031
1032
1033
1035// for (int i=0; i<10; i++)
1036// avg->set_value_at(i,0,i);
1037
1038 if (images.size()!=1) throw ImageDimensionException("Only takes a single image as input");
1039 int nx=images[0]->get_xsize();
1040 int ny=images[0]->get_ysize();
1041 int nz=images[0]->get_zsize();
1042 if (nz>1)
1043 throw ImageDimensionException("Only takes 2D images.");
1044 int maxr=params.set_default("maxr",nx/2-1);
1045 int step=params.set_default("step",2);
1046
1047 EMData *avg = new EMData(maxr/step+1,1);
1048
1049 int ix,iy,it,count;
1050 for (it=0; it<maxr; it+=step){
1051 float mn=0;
1052 count=0;
1053 for (ix=-maxr-1; ix<=maxr+1; ix++){
1054 for (iy=-maxr-1; iy<=maxr+1; iy++){
1055 int d2=ix*ix+iy*iy;
1056 if (d2>=it*it && d2<(it+step)*(it+step)){
1057 count++;
1058 mn+=images[0]->sget_value_at(ix+nx/2,iy+ny/2);
1059
1060 }
1061 }
1062 }
1063
1064 mn/=count;
1065 if(verbose>0) printf("%d,%d,%f\n",it,count,mn);
1066 avg->set_value_at(it/step,0,mn);
1067 }
1068
1069
1070 ret.push_back(avg);
1071 return ret;
1072}
1073
1074
1075
#define TOL
Definition: analyzer.cpp:696
#define hvec(i)
Definition: analyzer.cpp:702
#define V(i, j)
Definition: analyzer.cpp:697
#define qmat(i, j)
Definition: analyzer.cpp:590
#define eigval(i)
Definition: analyzer.cpp:594
#define diag(i)
Definition: analyzer.cpp:701
#define Av(i)
Definition: analyzer.cpp:699
#define rdata(i)
Definition: analyzer.cpp:592
float qsqcmp(EMData *a, EMData *b)
Definition: analyzer.cpp:388
#define imgdata(i)
Definition: analyzer.cpp:453
#define v0(i)
Definition: analyzer.cpp:698
#define covmat(i, j)
Definition: analyzer.cpp:452
#define eigvec(i, j)
Definition: analyzer.cpp:958
#define subdiag(i)
Definition: analyzer.cpp:700
virtual int insert_image(EMData *image)=0
insert a image to the list of input images
vector< EMData * > images
Definition: analyzer.h:117
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:1034
vector< EMData * > ret
Definition: analyzer.h:408
static const string NAME
Definition: analyzer.h:404
Dict is a dictionary to store <string, EMObject> pair.
Definition: emobject.h:385
type set_default(const string &key, type val)
Default setting behavior This can be achieved using a template - d.woolford Jan 2008 (before there wa...
Definition: emobject.h:569
bool has_key(const string &key) const
Ask the Dictionary if it as a particular key.
Definition: emobject.h:511
EMData stores an image's data and defines core image processing routines.
Definition: emdata.h:82
Factory is used to store objects to create new instances.
Definition: emobject.h:725
static const string NAME
Definition: analyzer.h:162
vector< EMData * > ret
Definition: analyzer.h:166
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:78
void set_params(const Dict &new_params)
Set the Analyzer parameters using a key/value dictionary.
Definition: analyzer.cpp:161
vector< EMData * > centers
Definition: analyzer.h:288
void update_centers(int sigmas=0)
Definition: analyzer.cpp:255
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:174
static const string NAME
Definition: analyzer.h:280
virtual int insert_image(EMData *image)
insert a image to the list of input images
Definition: analyzer.cpp:937
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:959
void set_params(const Dict &new_params)
Set the Analyzer parameters using a key/value dictionary.
Definition: analyzer.cpp:1003
gsl_matrix * A
Definition: analyzer.h:360
static const string NAME
Definition: analyzer.h:350
EMData * mask
Definition: analyzer.h:353
vector< EMData * > ret
Definition: analyzer.h:216
static const string NAME
Definition: analyzer.h:212
virtual vector< EMData * > analyze()
main function for Analyzer, analyze input images and create output images
Definition: analyzer.cpp:122
static int get_irand(int low, int high)
Get an integer random number between low and high, [low, high].
Definition: util.cpp:719
#define Assert(s)
Define Assert() function that is effective only when -DDEBUG is used.
Definition: emassert.h:42
EMData * sqrt() const
return square root of current image
EMObject get_attr(const string &attr_name) const
The generic way to get any image header information given a header attribute name.
int get_ysize() const
Get the image y-dimensional size.
int get_zsize() const
Get the image z-dimensional size.
int get_xsize() const
Get the image x-dimensional size.
bool has_attr(const string &key) const
Ask if the header has a particular attribute.
EMObject get_attr_default(const string &attr_name, const EMObject &em_obj=EMObject()) const
The generic way to get any image header information given a header attribute name.
void set_attr(const string &key, EMObject val)
Set a header attribute's value.
void EMDeletePtr(T &x)
Definition: emutil.h:47
void EMDeleteArray(T &x)
Definition: emutil.h:62
#define ImageDimensionException(desc)
Definition: exception.h:166
#define NullPointerException(desc)
Definition: exception.h:241
E2Exception class.
Definition: aligner.h:40
map< string, vector< string > > dump_analyzers_list()
Definition: analyzer.cpp:1027
void dump_analyzers()
Definition: analyzer.cpp:1022
#define y(i, j)
Definition: projector.cpp:1516
#define x(i)
Definition: projector.cpp:1517
#define images(i, j, k)
Definition: projector.cpp:1897