EMAN2
Public Member Functions | Static Public Member Functions | Static Public Attributes | Protected Member Functions | Protected Attributes | List of all members
EMAN::KMeansAnalyzer Class Reference

KMeansAnalyzer Performs k-means classification on a set of input images (shape/size arbitrary) returned result is a set of classification vectors. More...

#include <analyzer.h>

Inheritance diagram for EMAN::KMeansAnalyzer:
Inheritance graph
[legend]
Collaboration diagram for EMAN::KMeansAnalyzer:
Collaboration graph
[legend]

Public Member Functions

 KMeansAnalyzer ()
 
virtual int insert_image (EMData *image)
 insert a image to the list of input images More...
 
virtual vector< EMData * > analyze ()
 main function for Analyzer, analyze input images and create output images More...
 
string get_name () const
 Get the Analyzer's name. More...
 
string get_desc () const
 Get the Analyzer's description. More...
 
void set_params (const Dict &new_params)
 Set the Analyzer parameters using a key/value dictionary. More...
 
TypeDict get_param_types () const
 Get Analyzer parameter information in a dictionary. More...
 
- Public Member Functions inherited from EMAN::Analyzer
 Analyzer ()
 
virtual ~Analyzer ()
 
virtual int insert_images_list (vector< EMData * > image_list)
 insert a list of images to the list of input images More...
 
virtual Dict get_params () const
 Get the Reconstructor's parameters in a key/value dictionary. More...
 

Static Public Member Functions

static AnalyzerNEW ()
 

Static Public Attributes

static const string NAME = "kmeans"
 

Protected Member Functions

void update_centers (int sigmas=0)
 
void reclassify ()
 
void reseed ()
 
void resort ()
 

Protected Attributes

vector< EMData * > centers
 
int ncls
 
int nclstot
 
int verbose
 
int minchange
 
int maxiter
 
int mininclass
 
int nchanged
 
int slowseed
 
int calcsigmamean
 
int outlierclass
 
- Protected Attributes inherited from EMAN::Analyzer
Dict params
 
vector< EMData * > images
 

Detailed Description

KMeansAnalyzer Performs k-means classification on a set of input images (shape/size arbitrary) returned result is a set of classification vectors.

Author
Steve Ludtke
Date
03/02/2008
Parameters
verboseDisplay progress if set, more detail with larger numbers (9 max)
nclsnumber of desired classes
maxitermaximum number of iterations
minchangeTerminate if fewer than minchange members move in an iteration
mininclassMinumum number of particles to keep a class as good (not enforced at termination
slowseedInstead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations
outlierclassThe last class will be reserved for outliers. Any class containing fewer than n particles will be permanently moved to the outlier group. default = disabled
calcsigmameanComputes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers

Definition at line 236 of file analyzer.h.

Constructor & Destructor Documentation

◆ KMeansAnalyzer()

EMAN::KMeansAnalyzer::KMeansAnalyzer ( )
inline

Definition at line 239 of file analyzer.h.

Referenced by NEW().

Member Function Documentation

◆ analyze()

vector< EMData * > KMeansAnalyzer::analyze ( )
virtual

main function for Analyzer, analyze input images and create output images

Returns
vector<EMData *> result os images analysis

Implements EMAN::Analyzer.

Definition at line 174 of file analyzer.cpp.

175{
176if (ncls<=1) return vector<EMData *>();
177//srandom(time(0));
178
179// These are the class centers, start each with a random image
180int nptcl=images.size();
181if (calcsigmamean) centers.resize(nclstot*2);
182else centers.resize(nclstot);
183if (mininclass<1) mininclass=1;
184
185int seedmode=params.set_default("seedmode",(int)0);
186
187// in outlier mode we don't use the bad center concept
188if (outlierclass==0) {
189 for (int i=0; i<nptcl; i++) images[i]->set_attr("is_ok_center",(int)5); // if an image becomes part of too small a set, it will (eventually) be marked as a bad center
190}
191
192if (slowseed) {
193 if (ncls>25) ncls=slowseed=ncls/25+1; // this becomes the number to seed in each step
194// if (maxiter<ncls*3+20) maxiter=ncls*3+20; // We need to make sure we have enough iterations to seed all of the classes
195// ncls=2;
196}
197
198if (seedmode==0) {
199 for (int i=0; i<ncls; i++) {
200 // Fixed by d.woolford, Util.get_irand is inclusive (added a -1)
201 centers[i]=images[Util::get_irand(0,nptcl-1)]->copy();
202 }
203}
204else if (seedmode==1) {
205 // find the images with the largest and smallest sum
206 EMData *max;
207 float maxv=-1.0e27;
208 EMData *min;
209 float minv=1.0e27;
210 for (int i=0; i<nptcl; i++) {
211 float m = images[i]->get_attr("mean");
212 if (m<minv) { minv=m; min=images[i]; }
213 if (m>maxv) { maxv=m; max=images[i]; }
214 }
215 centers[0]=min->copy();
216 centers[ncls-1]=max->copy();
217
218 // now fill in linear interpolates in between
219 for (int i=1; i<ncls-1; i++) {
220 centers[i]=centers[0]->copy();
221 centers[i]->mult((ncls-i-1.0f)/(ncls-1.0f));
222 EMData *tmp=max->copy();
223 tmp->mult(i/(ncls-1.0f));
224 centers[i]->add(*tmp);
225 delete tmp;
226 }
227}
228
229if (calcsigmamean) {
230 for (int i=nclstot; i<nclstot*2; i++) centers[i]=new EMData(images[0]->get_xsize(),images[0]->get_ysize(),images[0]->get_zsize());
231}
232
233
234for (int i=0; i<maxiter; i++) {
235 nchanged=0;
236 resort();
237 reclassify();
239 if (verbose) printf("iter %d> %d (%d)\n",i,nchanged,ncls);
240 if (nchanged<minchange && ncls==nclstot) break;
241
242 if (slowseed && i%3==2 && ncls<nclstot) {
243 for (int j=0; j<slowseed && ncls<nclstot; j++) {
244 centers[ncls]=0;
245 ncls++;
246 }
247 reseed();
248 }
249}
251
252return centers;
253}
vector< EMData * > images
Definition: analyzer.h:117
type set_default(const string &key, type val)
Default setting behavior This can be achieved using a template - d.woolford Jan 2008 (before there wa...
Definition: emobject.h:569
EMData stores an image's data and defines core image processing routines.
Definition: emdata.h:82
vector< EMData * > centers
Definition: analyzer.h:288
void update_centers(int sigmas=0)
Definition: analyzer.cpp:255
static int get_irand(int low, int high)
Get an integer random number between low and high, [low, high].
Definition: util.cpp:719
int get_ysize() const
Get the image y-dimensional size.
int get_zsize() const
Get the image z-dimensional size.
int get_xsize() const
Get the image x-dimensional size.
void set_attr(const string &key, EMObject val)
Set a header attribute's value.

References calcsigmamean, centers, EMAN::Util::get_irand(), get_xsize(), get_ysize(), get_zsize(), EMAN::Analyzer::images, maxiter, minchange, mininclass, nchanged, ncls, nclstot, outlierclass, EMAN::Analyzer::params, reclassify(), reseed(), resort(), set_attr(), EMAN::Dict::set_default(), slowseed, update_centers(), and verbose.

◆ get_desc()

string EMAN::KMeansAnalyzer::get_desc ( ) const
inlinevirtual

Get the Analyzer's description.

Returns
The Analyzer's description.

Implements EMAN::Analyzer.

Definition at line 253 of file analyzer.h.

254 {
255 return "k-means classification";
256 }

◆ get_name()

string EMAN::KMeansAnalyzer::get_name ( ) const
inlinevirtual

Get the Analyzer's name.

Each Analyzer is identified by a unique name.

Returns
The Analyzer's name.

Implements EMAN::Analyzer.

Definition at line 248 of file analyzer.h.

249 {
250 return NAME;
251 }
static const string NAME
Definition: analyzer.h:280

References NAME.

◆ get_param_types()

TypeDict EMAN::KMeansAnalyzer::get_param_types ( ) const
inlinevirtual

Get Analyzer parameter information in a dictionary.

Each parameter has one record in the dictionary. Each record contains its name, data-type, and description.

Returns
A dictionary containing the parameter info.

Implements EMAN::Analyzer.

Definition at line 265 of file analyzer.h.

266 {
267 TypeDict d;
268 d.put("verbose", EMObject::INT, "Display progress if set, more detail with larger numbers (9 max)");
269 d.put("seedmode",EMObject::INT, "How to generate initial seeds. 0 - random element (default), 1 - max sum, min sum, linear");
270 d.put("ncls", EMObject::INT, "number of desired classes");
271 d.put("maxiter", EMObject::INT, "maximum number of iterations (default=100)");
272 d.put("minchange", EMObject::INT, "Terminate if fewer than minchange members move in an iteration");
273 d.put("mininclass", EMObject::INT, "Minumum number of particles to keep a class as good (not enforced at termination");
274 d.put("slowseed",EMObject::INT, "Instead of seeding all classes at once, it will gradually increase the number of classes by adding new seeds in groups with large standard deviations");
275 d.put("outlierclass",EMObject::INT, "The last class will be reserved for outliers. Any class containing fewer than n particles will be permanently moved to the outlier group. default = disabled");
276 d.put("calcsigmamean",EMObject::INT, "Computes standard deviation of the mean image for each class-average (center), and returns them at the end of the list of centers");
277 return d;
278 }

References EMAN::EMObject::INT, and EMAN::TypeDict::put().

◆ insert_image()

virtual int EMAN::KMeansAnalyzer::insert_image ( EMData image)
inlinevirtual

insert a image to the list of input images

Parameters
image
Returns
int 0 for success, <0 for fail

Implements EMAN::Analyzer.

Definition at line 241 of file analyzer.h.

241 {
242 images.push_back(image);
243 return 0;
244 }

References EMAN::Analyzer::images.

◆ NEW()

static Analyzer * EMAN::KMeansAnalyzer::NEW ( )
inlinestatic

Definition at line 258 of file analyzer.h.

259 {
260 return new KMeansAnalyzer();
261 }

References KMeansAnalyzer().

◆ reclassify()

void KMeansAnalyzer::reclassify ( )
protected

Definition at line 429 of file analyzer.cpp.

429 {
430int nptcl=images.size();
431
432//Cmp *c = Factory < Cmp >::get("sqeuclidean");
433for (int i=0; i<nptcl; i++) {
434 if (outlierclass && (int)images[i]->get_attr_default("class_id",0)==nclstot-1) continue; // outliers are forever
435 float best=1.0e38f;
436 int bestn=0;
437 int lim=ncls;
438 if (outlierclass) lim=ncls-1; // particles don't join the outliers based on distance
439 for (int j=0; j<lim; j++) {
440// float d=c->cmp(images[i],centers[j]);
441 float d=qsqcmp(images[i],centers[j]);
442 if (d<best) { best=d; bestn=j; }
443 }
444 int oldn=images[i]->get_attr_default("class_id",0);
445 if (oldn!=bestn) nchanged++;
446 images[i]->set_attr("class_id",bestn);
447 images[i]->set_attr("class_cendist",best); // store this for reseeding
448}
449//delete c;
450}
float qsqcmp(EMData *a, EMData *b)
Definition: analyzer.cpp:388
EMObject get_attr_default(const string &attr_name, const EMObject &em_obj=EMObject()) const
The generic way to get any image header information given a header attribute name.

References centers, get_attr_default(), EMAN::Analyzer::images, nchanged, ncls, nclstot, outlierclass, and qsqcmp().

Referenced by analyze().

◆ reseed()

void KMeansAnalyzer::reseed ( )
protected

Definition at line 329 of file analyzer.cpp.

329 {
330int nptcl=images.size();
331int i,j;
332
333// if no classes need reseeding just return
334for (i=0; i<ncls; i++) {
335 if (!centers[i]) break;
336}
337if (i==ncls) return;
338
339// make a list of all particles which could be centers
340vector<int> goodcen;
341if (outlierclass) {
342 for (int i=0; i<nptcl; i++) { if ((int)images[i]->get_attr("class_id")!=nclstot-1) goodcen.push_back(i); }
343}
344else {
345// printf("c%d\n",outlierclass);
346 for (int i=0; i<nptcl; i++) { if ((int)images[i]->get_attr("is_ok_center")>0) goodcen.push_back(i); }
347}
348
349if (goodcen.size()==0) {
350 printf("Kmeans ran out of valid center particles, disabling outlier mode and finishing. Results not valid.\n");
351 for (int i=0; i<nptcl; i++) goodcen.push_back(i);
352 outlierclass=0;
353 return;
354}
355// throw UnexpectedBehaviorException("Kmeans ran out of valid center particles with the provided parameters");
356
357// pick a random particle for the new seed
358// for (i=0; i<ncls; i++) {
359// if (centers[i]) continue; // center doesn't need reseeding
360// j=Util::get_irand(0,goodcen.size()-1);
361// centers[i]=images[j]->copy(); // Isn't this wrong? Should it be looking in goodcen?
362// centers[i]->set_attr("ptcl_repr",1);
363// printf("reseed %d -> %d\n",i,j);
364// }
365
366// use a valid center with a large distance for the new seed
367for (i=0; i<ncls; i++) {
368 if (centers[i]) continue; // center doesn't need reseeding
369 if (outlierclass) j=Util::get_irand(0,ncls-2); // don't reuse particles identified as outliers
370 else j=Util::get_irand(0,ncls-1); // pick a random class
371 // The worst particle method with outliers often 'eats' all of the particles
372 if (!outlierclass && centers[j] && centers[j]->has_attr("worst_ptcl")) { // try to use the worst particle from that class
373 centers[i]=images[(int)centers[j]->get_attr("worst_ptcl")]->copy();
374 printf("reseed %d -> worst (cls %d)\n",i,j);
375 }
376 else {
377 j=Util::get_irand(0,goodcen.size()-1);
378 centers[i]=images[goodcen[j]]->copy();
379 printf("reseed %d -> %d\n",i,j);
380 }
381 centers[i]->set_attr("ptcl_repr",1);
382}
383
384
385}
EMObject get_attr(const string &attr_name) const
The generic way to get any image header information given a header attribute name.
bool has_attr(const string &key) const
Ask if the header has a particular attribute.

References centers, get_attr(), EMAN::Util::get_irand(), has_attr(), EMAN::Analyzer::images, ncls, nclstot, and outlierclass.

Referenced by analyze(), and update_centers().

◆ resort()

void KMeansAnalyzer::resort ( )
protected

Definition at line 400 of file analyzer.cpp.

400 {
401
402// Cmp *c = Factory < Cmp >::get("sqeuclidean");
403
404 // The first center remains first, we proceed from that starting point
405 // simple shells sort to an out-of-place reference
406 int sortmax=ncls;
407 if (outlierclass && ncls==nclstot) sortmax--; // outlier class must not get resorted!
408
409 for (int i=1; i<sortmax; i++) {
410 float bst=1.0e22;
411 for (int j=i; j<sortmax; j++) {
412// float d=c->cmp(centers[i-1],centers[j]);
413 float d=qsqcmp(centers[i-1],centers[j]);
414 if (d<bst) {
415 bst=d;
416 if (j!=i) {
417 EMData *tmp=centers[j];
418 centers[j]=centers[i];
419 centers[i]=tmp;
420 }
421 }
422 }
423 }
424
425// delete c;
426}

References centers, ncls, nclstot, outlierclass, and qsqcmp().

Referenced by analyze().

◆ set_params()

void KMeansAnalyzer::set_params ( const Dict new_params)
virtual

Set the Analyzer parameters using a key/value dictionary.

Parameters
new_paramsA dictionary containing the new parameters.

Reimplemented from EMAN::Analyzer.

Definition at line 161 of file analyzer.cpp.

162{
163 params = new_params;
164 if (params.has_key("ncls")) ncls = nclstot = params["ncls"];
165 if (params.has_key("maxiter"))maxiter = params["maxiter"];
166 if (params.has_key("minchange"))minchange = params["minchange"];
167 if (params.has_key("mininclass"))mininclass = params["mininclass"];
168 if (params.has_key("slowseed"))slowseed = params["slowseed"];
169 if (params.has_key("verbose"))verbose = params["verbose"];
170 if (params.has_key("calcsigmamean")) calcsigmamean=params["calcsigmamean"];
171 if (params.has_key("outlierclass")) outlierclass=params["outlierclass"];
172}
bool has_key(const string &key) const
Ask the Dictionary if it as a particular key.
Definition: emobject.h:511

References calcsigmamean, EMAN::Dict::has_key(), maxiter, minchange, mininclass, ncls, nclstot, outlierclass, EMAN::Analyzer::params, slowseed, and verbose.

◆ update_centers()

void KMeansAnalyzer::update_centers ( int  sigmas = 0)
protected

Definition at line 255 of file analyzer.cpp.

255 {
256int nptcl=images.size();
257//int repr[ncls];
258vector<int> repr(ncls);
259
260for (int i=0; i<ncls; i++) {
261 centers[i]->to_zero();
262 if (sigmas) centers[i+ncls]->to_zero();
263 repr[i]=0;
264 centers[i]->set_attr("worst_ptcldist",0.0f);
265}
266
267// compute new position for each center
268for (int i=0; i<nptcl; i++) {
269 int cid=images[i]->get_attr("class_id");
270 // outlier mode disables is_ok_center functionality
271 if (outlierclass || (int)images[i]->get_attr("is_ok_center")>0) {
272 centers[cid]->add(*images[i]);
273 if (sigmas) centers[cid+ncls]->addsquare(*images[i]);
274 repr[cid]++;
275 float imdist=images[i]->get_attr("class_cendist");
276 if (imdist>(float)centers[cid]->get_attr("worst_ptcldist")) {
277 centers[cid]->set_attr("worst_ptcldist",imdist);
278 centers[cid]->set_attr("worst_ptcl",i);
279 }
280 }
281}
282
283for (int i=0; i<ncls; i++) {
284 // If this class is too small, outlier class is never reseeded
285 if (repr[i]<mininclass && (outlierclass==0||i<nclstot-1)) {
286 // find all of the particles in the class, and decrement their "is_ok_center" counter.
287 // when it reaches zero the particle will no longer participate in determining the location of a center
288 if (outlierclass) { // outliers are relegated to the outlier class permanently
289 for (int j=0; j<nptcl; j++) {
290 if ((int)images[j]->get_attr("class_id")==i) {
291 if (verbose) printf("outlier: %d\n",j);
292 images[j]->set_attr("class_id",nclstot-1);
293 //nchanged++; // should happen automatically below
294 }
295 }
296 }
297 // if not using outlier class, we use "is_ok_center" concept to reduce influence of outliers
298 else {
299 for (int j=0; j<nptcl; j++) {
300 if ((int)images[j]->get_attr("class_id")==i) images[i]->set_attr("is_ok_center",(int)images[i]->get_attr("is_ok_center")-1);
301 }
302 }
303 // Mark the center for reseeding
304 delete centers[i];
305 centers[i]=0;
306 repr[i]=0;
307 }
308 // finishes off the statistics we started computing above
309 else {
310 centers[i]->mult((float)1.0/(float)(repr[i]));
311 centers[i]->set_attr("ptcl_repr",repr[i]);
312 if (sigmas) {
313 centers[i+ncls]->mult((float)1.0/(float)(repr[i])); // sum of squares over n
314 centers[i+ncls]->subsquare(*centers[i]); // subtract the mean value squared
315 centers[i+ncls]->process("math.sqrt"); // square root
316 centers[i+ncls]->mult((float)1.0/(float)sqrt((float)repr[i])); // divide by sqrt(N) to get std. dev. of mean
317 }
318
319 }
320 if (verbose>1) printf("%d(%d)\t",i,(int)repr[i]);
321}
322
323if (verbose>1) printf("\n");
324
325reseed();
326}
EMData * sqrt() const
return square root of current image

References centers, get_attr(), EMAN::Analyzer::images, mininclass, ncls, nclstot, outlierclass, reseed(), sqrt(), and verbose.

Referenced by analyze().

Member Data Documentation

◆ calcsigmamean

int EMAN::KMeansAnalyzer::calcsigmamean
protected

Definition at line 297 of file analyzer.h.

Referenced by analyze(), and set_params().

◆ centers

vector<EMData *> EMAN::KMeansAnalyzer::centers
protected

Definition at line 288 of file analyzer.h.

Referenced by analyze(), reclassify(), reseed(), resort(), and update_centers().

◆ maxiter

int EMAN::KMeansAnalyzer::maxiter
protected

Definition at line 293 of file analyzer.h.

Referenced by analyze(), and set_params().

◆ minchange

int EMAN::KMeansAnalyzer::minchange
protected

Definition at line 292 of file analyzer.h.

Referenced by analyze(), and set_params().

◆ mininclass

int EMAN::KMeansAnalyzer::mininclass
protected

Definition at line 294 of file analyzer.h.

Referenced by analyze(), set_params(), and update_centers().

◆ NAME

const string EMAN::KMeansAnalyzer::NAME = "kmeans"
static

Definition at line 280 of file analyzer.h.

Referenced by get_name().

◆ nchanged

int EMAN::KMeansAnalyzer::nchanged
protected

Definition at line 295 of file analyzer.h.

Referenced by analyze(), and reclassify().

◆ ncls

int EMAN::KMeansAnalyzer::ncls
protected

Definition at line 289 of file analyzer.h.

Referenced by analyze(), reclassify(), reseed(), resort(), set_params(), and update_centers().

◆ nclstot

int EMAN::KMeansAnalyzer::nclstot
protected

Definition at line 290 of file analyzer.h.

Referenced by analyze(), reclassify(), reseed(), resort(), set_params(), and update_centers().

◆ outlierclass

int EMAN::KMeansAnalyzer::outlierclass
protected

Definition at line 298 of file analyzer.h.

Referenced by analyze(), reclassify(), reseed(), resort(), set_params(), and update_centers().

◆ slowseed

int EMAN::KMeansAnalyzer::slowseed
protected

Definition at line 296 of file analyzer.h.

Referenced by analyze(), and set_params().

◆ verbose

int EMAN::KMeansAnalyzer::verbose
protected

Definition at line 291 of file analyzer.h.

Referenced by analyze(), set_params(), and update_centers().


The documentation for this class was generated from the following files: