EMAN2
speedtest.cpp
Go to the documentation of this file.
00001 /*
00002  * Author: Steven Ludtke, 04/10/2003 (sludtke@bcm.edu)
00003  * Copyright (c) 2000-2006 Baylor College of Medicine
00004  *
00005  * This software is issued under a joint BSD/GNU license. You may use the
00006  * source code in this file under either license. However, note that the
00007  * complete EMAN2 and SPARX software packages have some GPL dependencies,
00008  * so you are responsible for compliance with the licenses of these packages
00009  * if you opt to use BSD licensing. The warranty disclaimer below holds
00010  * in either instance.
00011  *
00012  * This complete copyright notice must be included in any revised version of the
00013  * source code. Additional authorship citations may be added, but existing
00014  * author citations must be preserved.
00015  *
00016  * This program is free software; you can redistribute it and/or modify
00017  * it under the terms of the GNU General Public License as published by
00018  * the Free Software Foundation; either version 2 of the License, or
00019  * (at your option) any later version.
00020  *
00021  * This program is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00024  * GNU General Public License for more details.
00025  *
00026  * You should have received a copy of the GNU General Public License
00027  * along with this program; if not, write to the Free Software
00028  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00029  *
00030  * */
00031 
00032 /* This program runs a set of speed tests on the current machine
00033 
00034  * This program runs a set of speed tests in the current computer. It should
00035  * have at least 1 unloaded processor, and minimal i/o occuring when this
00036  *  is run. It will give a single value which should be generally proportional
00037  * to how fast refine will run on a given computer. It is a 'real world' test,
00038  * in that it simulates a variety of actual operations performed by a
00039  * refinement procedure. Don't compare values given by speed test in
00040  * different versions of EMAN, since the underlying routines may be
00041  * improved with time.
00042  *
00043  * */
00044 
00045 #include <ctime>
00046 
00047 #include "emdata.h"
00048 #include <sstream>
00049 using std::stringstream;
00050 
00051 #include <iostream>
00052 using std::cout;
00053 using std::endl;
00054 
00055 using namespace EMAN;
00056 
00057 #define CPS     (CLOCKS_PER_SEC)
00058 
00059 
00060 int main(int argc, char *argv[])
00061 {
00062         int SIZE = 96;
00063     int NTT = 500;
00064 
00065     int slow = 0;
00066     int low = 0;
00067     int big = 0;
00068     int newali = 0;
00069     int vg = 0;
00070 //#ifdef EMAN2_USING_CUDA
00071 //    int gpu = 0;
00072 //#endif
00073 
00074     if (argc > 1) {
00075                 if (Util::sstrncmp(argv[1], "slow"))
00076                         slow = 1;
00077                 else if (Util::sstrncmp(argv[1], "best"))
00078                         slow = 3;
00079                 else if (Util::sstrncmp(argv[1], "low"))
00080                         low = 1;
00081                 else if (Util::sstrncmp(argv[1], "refine"))
00082                         newali = 1;
00083                 else if (Util::sstrncmp(argv[1], "big"))
00084                         big = 1;
00085                 else if (Util::sstrncmp(argv[1], "valgrind"))
00086                         vg=1;
00087 //#ifdef EMAN2_USING_CUDA
00088 //              else if (Util::sstrncmp(argv[1], "gpu")) {
00089 //                      cout << "GPU mode selected" << endl;
00090 //                      gpu=1;
00091 //              }
00092 //#endif
00093     }
00094     if (argc > 2) {
00095         // assume it's the boxsize
00096         stringstream ss;
00097         ss << argv[2];
00098         ss >> SIZE;
00099         if (SIZE < 0 || SIZE > 1024) {
00100                 cout << "The boxsize " << SIZE << " is invalid. Please specify a box size in the range of (0,1024]" << endl;
00101                 throw;
00102         }
00103     }
00104 
00105     // This is some testing code used to run valgrind tests on
00106     // parts of the core library. It is not for benchmarking
00107     // purposes.
00108     if (vg) {
00109                 EMData *a = new EMData();
00110                 a->set_size(128,128,1);
00111                 a->process_inplace("testimage.scurve");
00112                 EMData *b = a->rot_scale_trans2D(0.0,1.0,1.0,0.0);
00113                 EMData *d = b->align("rotate_translate",a,Dict(),"sqeuclidean");
00114                 Dict r=b->get_attr_dict();
00115                 printf("%p\n",&r);
00116                 printf("nx = %d\n",(int)r["nx"]);
00117                 delete a;
00118                 delete b;
00119                 delete d;
00120                 exit(0);
00121     }
00122 
00123     printf("This could take a few minutes. Please be patient.\n");
00124     if (big) {
00125                 NTT = 100;
00126                 SIZE = 320;
00127     }
00128 
00129     printf("Initializing\n");
00130 
00131     EMData pat;
00132     pat.set_size(SIZE, SIZE, 1);
00133     float *d = pat.get_data();
00134 
00135     for (int i = -SIZE / 2; i < SIZE / 2; i++) {
00136                 for (int j = -SIZE / 2; j < SIZE / 2; j++) {
00137                         d[(i + SIZE / 2) + (j + SIZE / 2) * SIZE] =
00138                                 -3.0f * exp(-Util::square((fabs((float)i) + fabs((float)j))) / 10.0f) +
00139                                 exp(-Util::square((fabs((float)i) + fabs((float)j) / 2.0f)) / 100.0f) *
00140                                 (abs(i) < 2 ? 2.0f : 1.0f);
00141                 }
00142     }
00143     pat.update();
00144     pat.process_inplace("normalize.circlemean");
00145     pat.process_inplace("mask.sharp", Dict("outer_radius", pat.get_xsize()/2));
00146 
00147     EMData *data[5000];
00148 
00149     for (int i = 0; i < NTT; i++) {
00150                 data[i] = pat.copy();
00151                 float *d = data[i]->get_data();
00152 
00153                 for (int j = 0; j < SIZE * SIZE; j++) {
00154                         d[j] += Util::get_gauss_rand(0, 1.0);
00155                 }
00156                 data[i]->update();
00157                 data[i]->process_inplace("normalize.circlemean");
00158                 data[i]->process_inplace("mask.sharp", Dict("outer_radius", data[i]->get_xsize()/2));
00159 
00160 //              if (i < 5) {
00161 //                      data[i]->write_image("speed.hed", i, EMUtil::IMAGE_IMAGIC);
00162 //              }
00163     }
00164 
00165     if (low) {
00166                 printf("Low level tests starting. Please note that compiling with optimization may invalidate certain tests. Also note that overhead is difficult to compensate for, so in most cases it is not dealt with.\n");
00167 
00168                 float t1 = (float) clock();
00169                 for (float fj = 0; fj < 500.0; fj += 1.0) {
00170                         for (int i = 0; i < NTT / 2.0; i += 1)
00171                                 data[i]->dot(data[i + NTT / 2]);
00172                 }
00173 
00174                 float t2 = (float) clock();
00175                 float ti = (t2 - t1) / (float) CPS;
00176                 printf("Baseline 1: %d, %d x %d dot()s in %1.1f sec  %1.1f/sec-> ~%1.2f mflops\n",
00177                            500 * NTT / 2, SIZE, SIZE, ti, 500 * NTT / (2.0 * ti),
00178                            SIZE * SIZE * 4 * 500.0 * NTT / (1000000.0 * ti));
00179 
00180                 t1 = (float) clock();
00181                 for (float fj = 0; fj < 500.0; fj += 1.0) {
00182                         for (float fi = 0; fi < NTT / 2.0; fi += 1.0)
00183                                 data[1]->dot(data[12]);
00184                 }
00185                 t2 = (float) clock();
00186                 ti = (t2 - t1) / (float) CPS;
00187                 printf("Baseline 1a: %d, %d x %d optimized cached dot()s in %1.1f s %1.1f/s-> ~%1.2f mflops\n",
00188                            500 * NTT / 2, SIZE, SIZE, ti, 500 * NTT / (2.0 * ti),
00189                            SIZE * SIZE * 4 * 500.0 * NTT / (1000000.0 * ti));
00190 
00191                 t1 = (float) clock();
00192                 for (int j = 0; j < 500; j++) {
00193                         for (int i = 0; i < NTT / 2; i++) {
00194                                 Dict d;
00195                                 d["keepzero"] = 1;
00196                                 data[i]->cmp("sqeuclidean", data[i + NTT / 2], d);
00197                         }
00198                 }
00199                 t2 = (float) clock();
00200                 ti = (t2 - t1) / (float) CPS;
00201                 printf("Baseline 2: %d, %d x %d lcmp()s in %1.1f sec   %1.1f/sec\n", 500 * NTT / 2, SIZE,
00202                            SIZE, ti, 500 * NTT / (2.0 * ti));
00203 
00204                 t1 = (float) clock();
00205                 for (int j = 0; j < 100; j++) {
00206                         for (int i = 0; i < NTT / 2; i++) {
00207                                 Dict params;
00208                                 data[i]->cmp("phase", data[i + NTT / 2], params);
00209                         }
00210                 }
00211                 t2 = (float) clock();
00212                 ti = (t2 - t1) / (float) CPS;
00213                 printf("Baseline 3: %d, %d x %d pcmp()s in %1.1f sec  %1.1f/sec\n", 100 * NTT / 2, SIZE,
00214                            SIZE, ti, 100 * NTT / (2.0 * ti));
00215 
00216                 t1 = (float) clock();
00217                 for (int j = 0; j < 100; j++) {
00218                         for (int i = 0; i < NTT / 2; i++) {
00219                                 Dict params;
00220                                 data[i]->cmp("frc", data[i + NTT / 2], params);
00221                         }
00222                 }
00223                 t2 = (float) clock();
00224                 ti = (t2 - t1) / (float) CPS;
00225                 printf("Baseline 4: %d, %d x %d fscmp()s in %1.1f sec  %1.1f/sec\n", 100 * NTT / 2, SIZE,
00226                            SIZE, ti, 100 * NTT / (2.0 * ti));
00227 
00228                 t1 = (float) clock();
00229                 for (int j = 0; j < 500; j++) {
00230                         for (int i = 0; i < NTT / 2; i++)
00231                                 data[i]->process_inplace("math.absvalue");
00232                 }
00233                 t2 = (float) clock();
00234                 ti = (t2 - t1) / (float) CPS;
00235                 printf("Baseline 5a: %d, %d x %d fabs in %1.1f sec -> ~%1.2f mfabs/sec\n", 500 * NTT / 2,
00236                            SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00237 
00238                 t1 = (float) clock();
00239                 for (int j = 0; j < 100; j++) {
00240                         for (int i = 0; i < NTT / 2; i++)
00241                                 data[i]->process_inplace("math.sqrt");
00242                 }
00243                 t2 = (float) clock();
00244                 ti = (t2 - t1) / (float) CPS;
00245                 printf("Baseline 5b: %d, %d x %d sqrts in %1.1f sec -> ~%1.2f msqrt/sec\n", 100 * NTT / 2,
00246                            SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00247 
00248                 d = data[0]->get_data();
00249                 t1 = (float) clock();
00250                 for (int j = 0; j < 100; j++) {
00251                         for (int i = 0; i < NTT / 2; i++) {
00252                                 for (int k = 0; k < SIZE * SIZE; k++)
00253                                         (void)sqrt(d[k]);
00254                         }
00255                 }
00256                 t2 = (float) clock();
00257                 ti = (t2 - t1) / (float) CPS;
00258                 printf("Baseline 5c: %d, %d x %d sqrts in %1.1f sec -> ~%1.2f msqrt/sec (cached)\n",
00259                            100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00260                 data[0]->update();
00261 
00262                 d = data[0]->get_data();
00263                 t1 = (float) clock();
00264                 for (int j = 0; j < 100; j++) {
00265                         for (int i = 0; i < NTT / 2; i++) {
00266                                 for (int k = 0; k < SIZE * SIZE; k++)
00267                                         (void)cos(d[k]);
00268                         }
00269                 }
00270                 t2 = (float) clock();
00271                 ti = (t2 - t1) / (float) CPS;
00272                 printf("Baseline 5d: %d, %d x %d cos in %1.1f sec -> ~%1.2f mcos/sec (cached)\n",
00273                            100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00274                 data[0]->update();
00275 
00276                 d = data[0]->get_data();
00277                 t1 = (float) clock();
00278                 for (int j = 0; j < 100; j++) {
00279                         for (int i = 0; i < NTT / 2; i++) {
00280                                 for (int k = 0; k < SIZE * SIZE - 1; k++)
00281                                         (void)hypot(d[k], d[k + 1]);
00282                         }
00283                 }
00284                 t2 = (float) clock();
00285                 ti = (t2 - t1) / (float) CPS;
00286                 printf("Baseline 5e: %d, %d x %d hypot in %1.1f sec -> ~%1.2f mhypot/sec (cached)\n",
00287                            100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 100.0 * NTT / (1000000.0 * ti));
00288                 data[0]->update();
00289 
00290                 d = data[0]->get_data();
00291                 t1 = (float) clock();
00292                 for (int j = 0; j < 1000; j++) {
00293                         for (int i = 0; i < NTT / 2; i++) {
00294                                 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00295                                         float f = d[k] * d[k + 1];
00296                                         f = f + f;
00297                                 }
00298                         }
00299                 }
00300 
00301                 t2 = (float) clock();
00302                 ti = (t2 - t1) / (float) CPS;
00303                 printf("Baseline 5f: %d, %d x %d mult in %1.1f sec -> ~%1.2f mmult/sec (cached)\n",
00304                            100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 1000.0 * NTT / (1000000.0 * ti));
00305                 data[0]->update();
00306 
00307                 d = data[0]->get_data();
00308                 t1 = (float) clock();
00309                 for (int j = 0; j < 500; j++) {
00310                         for (int i = 0; i < NTT / 2; i++) {
00311                                 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00312                                         float a = d[k] / d[k + 1];
00313                                         a = a + a;
00314                                 }
00315                         }
00316                 }
00317                 t2 = (float) clock();
00318                 ti = (t2 - t1) / (float) CPS;
00319                 printf("Baseline 5g: %d, %d x %d div in %1.1f sec -> ~%1.2f mdiv/sec (cached)\n",
00320                            100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00321                 data[0]->update();
00322 
00323                 d = data[0]->get_data();
00324                 t1 = (float) clock();
00325                 for (int j = 0; j < 500; j++) {
00326                         for (int i = 0; i < NTT / 2; i++) {
00327                                 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00328                                         float f = fabs(d[k]);
00329                                         f = f + f;
00330                                 }
00331                         }
00332                 }
00333                 t2 = (float) clock();
00334                 ti = (t2 - t1) / (float) CPS;
00335                 printf("Baseline 5h: %d, %d x %d fabs in %1.1f sec -> ~%1.2f fabs/sec (cached)\n",
00336                            100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00337                 data[0]->update();
00338 
00339                 d = data[0]->get_data();
00340                 t1 = (float) clock();
00341                 for (int j = 0; j < 500; j++) {
00342                         for (int i = 0; i < NTT / 2; i++) {
00343                                 for (int k = 0; k < SIZE * SIZE - 1; k++) {
00344                                         (void)atan2(d[k], d[k + 1]);
00345                                         (void)hypot(d[k], d[k + 1]);
00346                                 }
00347                         }
00348                 }
00349                 t2 = (float) clock();
00350                 ti = (t2 - t1) / (float) CPS;
00351                 printf("Baseline 5i: %d, %d x %d ri2ap in %1.1f sec -> ~%1.2f ri2ap/sec (cached)\n",
00352                            100 * NTT / 2, SIZE, SIZE, ti, SIZE * SIZE * 500.0 * NTT / (1000000.0 * ti));
00353 
00354                 data[0]->update();
00355                 t1 = (float) clock();
00356 
00357                 for (int i = 0; i < NTT * 100; i++) {
00358                         EMData *cp = data[i % NTT]->copy();
00359                         Dict d("n",2);
00360                         cp->process_inplace("math.meanshrink",d);
00361                         if( cp )
00362                         {
00363                                 delete cp;
00364                                 cp = 0;
00365                         }
00366                 }
00367                 t2 = (float) clock();
00368                 ti = (t2 - t1) / (float) CPS;
00369                 printf("Baseline 6:  %1.1f sec %f meanshrink x 2/sec\n", ti, NTT * 100.0 / ti);
00370 
00371                 EMData *d1a = data[0]->copy();
00372                 t1 = (float) clock();
00373 
00374                 for (int i = 0; i < NTT * 1000; i++) {
00375                         d1a->do_fft();
00376                         d1a->update();
00377                 }
00378                 t2 = (float) clock();
00379                 ti = (t2 - t1) / (float) CPS;
00380                 printf("Baseline 7:  %1.1f sec %f ffts/sec\n", ti, NTT * 1000 / ti);
00381 
00382                 d1a = d1a->copy();
00383                 t1 = (float) clock();
00384 
00385                 for (int i = 0; i < NTT * 1000; i++) {
00386                         d1a->translate(-1, -3, 0);
00387                 }
00388                 t2 = (float) clock();
00389                 ti = (t2 - t1) / (float) CPS;
00390                 printf("Baseline 8:  %1.1f sec   %f translates/sec\n", ti, NTT * 1000 / ti);
00391 
00392                 return 0;
00393     }
00394 
00395 
00396     EMData *tmp = 0;
00397     float t1 = (float) clock();
00398     for (int i = 0; i < 3; i++) {
00399 //#ifdef EMAN2_USING_CUDA
00400 //              if (gpu) {
00401 //                      cout << "Using gpu" << endl;
00402 //                      data[i]->set_gpu_rw_current();
00403 //                      data[i]->cuda_lock();
00404 //              }
00405 //#endif
00406                 for (int j = 5; j < NTT; j++) {
00407                         if (slow == 3) {
00408                                 tmp = data[i]->align("rtf_best", data[j],
00409                                                                          Dict("flip", (EMData*)0,"maxshift", SIZE/8));
00410                         }
00411                         else if (slow == 1) {
00412                                 Dict d;
00413                                 d["flip"] = (EMData*)0;
00414                                 d["maxshift"] = SIZE/8;
00415                                 tmp = data[i]->align("rtf_slow", data[j], d);
00416                         }
00417                         else if (newali == 1) {
00418                                 tmp = data[i]->align("rotate_translate_flip", data[j], Dict());
00419                                 EMData* tmp2 = tmp->align("refine", data[j], Dict());
00420                                 delete tmp2;
00421                                 tmp2 = 0;
00422                         }
00423                         else {
00424 //#ifdef EMAN2_USING_CUDA
00425 //                              if (gpu) {
00426 //                                      data[j]->set_gpu_rw_current();
00427 //                              }
00428 //#endif
00429                                 tmp = data[i]->align("rotate_translate_flip", data[j], Dict());
00430                         }
00431                 if( tmp )
00432                 {
00433                                 delete tmp;
00434                                 tmp = 0;
00435                 }
00436                         if (j % 10 == 0) {
00437                                 putchar('.');
00438                                 fflush(stdout);
00439                         }
00440                 }
00441 //#ifdef EMAN2_USING_CUDA
00442 //              if (gpu) {
00443 //                      data[i]->cuda_unlock();
00444 //              }
00445 //#endif
00446                 putchar('\n');
00447     }
00448     float t2 = (float) clock();
00449 
00450     float ti = (t2 - t1) / (float) CPS;
00451     if (slow == 2)
00452                 ti *= 10.0;
00453     if (!big && !slow && !newali) {
00454                 printf("\nFor comparison (note these numbers may change from release to release)\n");
00455                 printf("An AMD Athlon (32 bit) 900Mhz SF --------------------------------  360\n");
00456                 printf("An AMD Athlon XP 2400+ (32 bit) 2.0Ghz SF ----------------------- 1010\n");
00457                 printf("An AMD Athlon XP 2600+ (32 bit) 2.0Ghz SF ----------------------- 1090\n");
00458                 printf("An AMD Athlon 64 3700+ 2.2Ghz SF -------------------------------- 1530\n");
00459                 printf("An AMD Athlon 64 X2 3800+ 2.0Ghz SF ----------------------------- 1578\n");
00460                 printf("An AMD Athlon 64 FX-51 2.2Ghz SF -------------------------------- 1760\n");
00461                 printf("An AMD Opteron 248 2.2Ghz SF ------------------------------------ 1870\n");
00462                 printf("An Intel Core2 T7200 2.0Ghz SF ---------------------------------- 1990\n");
00463                 printf("An Intel Xeon E5335 2.0Ghz SF ----------------------------------- 2010\n");
00464                 printf("An AMD Opteron 280 2.4Ghz SF ------------------------------------ 2130\n");
00465                 printf("An Intel Core2 6700 2.66Ghz SF ---------------------------------- 2600\n");
00466                 printf("An Intel Core2 Duo T9400 2.53Ghz SF ----------------------------- 2730\n");
00467                 printf("An Intel Xeon E5430 2.66Ghz SF ---------------------------------- 2800\n");
00468                 printf("An Intel Xeon X5355 2.66Ghz SF ---------------------------------- 2920\n");
00469                 printf("An Intel Xeon X5450 3.0Ghz SF ----------------------------------- 3220\n");
00470                 printf("An Intel Xeon X5460 3.16Ghz SF ---------------------------------- 3320\n");
00471                 printf("\nYour machines speed factor = %1.1f\n", 25000.0 / ti);
00472                 printf("\n\nThis repesents %1.2f RTFAligns/sec\n",
00473                            3.0 * ((slow == 2 ? NTT / 10 : NTT) - 5) / ti);
00474     }
00475     else if (big && !slow) {
00476                 printf("\nYour machines speed factor = %1.1f\n", 72000.0 / ti);
00477     }
00478     else {
00479                 printf("\nYour machines speed factor on this test = %1.1f\n", 25000.0 / ti);
00480     }
00481     return 0;
00482 }
00483 
00484