00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #ifdef EMAN2_USING_CUDA
00035
00036 #include "emdata.h"
00037 #include "exception.h"
00038 #include <cuda_runtime_api.h>
00039 #include <driver_functions.h>
00040 #include <cuda.h>
00041 #include "cuda/cuda_util.h"
00042 #include "cuda/cuda_processor.h"
00043 #include "cuda/cuda_emfft.h"
00044
00045 using namespace EMAN;
00046
00047 EMData::CudaCache EMData::cuda_cache(100);
00048
00049 float* EMData::get_cuda_data() const {
00050 if (get_size() == 0 ) throw UnexpectedBehaviorException("The size of the data is 0?");
00051 if (cuda_cache_handle==-1 || EMDATA_GPU_NEEDS_UPDATE & flags) {
00052 if (cuda_cache_handle != -1 && gpu_ro_is_current() ) {
00053 cuda_cache.copy_ro_to_rw(cuda_cache_handle);
00054 } else {
00055 if (cuda_cache_handle !=-1 ) {
00056 cuda_cache.clear_item(cuda_cache_handle);
00057 }
00058 cuda_cache_handle = cuda_cache.cache_rw_data(this,rdata,nx,ny,nz);
00059 if (cuda_cache_handle == -1) throw;
00060 }
00061 flags &= ~EMDATA_GPU_NEEDS_UPDATE;
00062 }
00063 return cuda_cache.get_rw_data(cuda_cache_handle);
00064 }
00065
00066 bool EMData::gpu_rw_is_current() const {
00067 if (cuda_cache_handle !=-1 && !(EMDATA_GPU_NEEDS_UPDATE & flags)) return cuda_cache.has_rw_data(cuda_cache_handle);
00068 else return false;
00069 }
00070
00071 bool EMData::cpu_rw_is_current() const {
00072 if (!(EMDATA_CPU_NEEDS_UPDATE & flags) && rdata != 0) return true;
00073 return false;
00074 }
00075
00076 bool EMData::gpu_ro_is_current() const {
00077 if (cuda_cache_handle !=-1 && !(EMDATA_GPU_RO_NEEDS_UPDATE & flags)) return cuda_cache.has_ro_data(cuda_cache_handle);
00078 else return false;
00079 }
00080
00081 void EMData::bind_cuda_texture(const bool interp_mode) const {
00082 check_cuda_array_update();
00083 cuda_cache.lock(cuda_cache_handle);
00084 bind_cuda_array_to_texture(cuda_cache.get_ro_data(cuda_cache_handle),cuda_cache.get_ndim(cuda_cache_handle),interp_mode);
00085 }
00086
00087 void EMData::unbind_cuda_texture() const {
00088 ::unbind_cuda_texture(cuda_cache.get_ndim(cuda_cache_handle));
00089 cuda_cache.unlock(cuda_cache_handle);
00090 }
00091
00092 cudaArray* EMData::get_cuda_array() const {
00093 if (get_size() == 0 ) throw UnexpectedBehaviorException("The size of the data is 0?");
00094 check_cuda_array_update();
00095 return cuda_cache.get_ro_data(cuda_cache_handle);
00096 }
00097
00098 void EMData::check_cuda_array_update() const {
00099 if (cuda_cache_handle==-1 || EMDATA_GPU_RO_NEEDS_UPDATE & flags) {
00100 if (cuda_cache_handle !=- 1 && gpu_rw_is_current() ) {
00101 cuda_cache.copy_rw_to_ro(cuda_cache_handle);
00102 } else {
00103 if (cuda_cache_handle !=-1 ) cuda_cache.clear_item(cuda_cache_handle);
00104 cuda_cache_handle = cuda_cache.cache_ro_data(this,rdata,nx,ny,nz);
00105 if (cuda_cache_handle >=50 ) throw InvalidValueException(cuda_cache_handle,"In get cuda data, the handle is strange");
00106 if (cuda_cache_handle == -1) throw;
00107 }
00108 flags &= ~EMDATA_GPU_RO_NEEDS_UPDATE;
00109 }
00110 }
00111
00112 void EMData::cuda_cache_lost_imminently() const {
00113
00114 get_data();
00115 flags |= EMDATA_GPU_NEEDS_UPDATE| EMDATA_GPU_RO_NEEDS_UPDATE;
00116 cuda_cache_handle = -1;
00117 }
00118 void EMData::cuda_lock() const {
00119 if (cuda_cache_handle == -1) throw UnexpectedBehaviorException("No cuda handle, can't lock");
00120 cuda_cache.lock(cuda_cache_handle);
00121
00122 }
00123 void EMData::cuda_unlock() const {
00124
00125
00126 if (cuda_cache_handle == -1) throw UnexpectedBehaviorException("No cuda handle, can't lock");
00127 cuda_cache.unlock(cuda_cache_handle);
00128 }
00129 EMDataForCuda EMData::get_data_struct_for_cuda() const {
00130 EMDataForCuda tmp = {get_cuda_data(),nx,ny,nz};
00131 return tmp;
00132 }
00133
00134 bool EMData::gpu_operation_preferred() const {
00135 bool cpu = cpu_rw_is_current();
00136 bool gpu = gpu_rw_is_current();
00137 if ( cpu==0 && gpu==0 ) {
00138
00139 return false;
00140
00141
00142
00143
00144
00145
00146 }
00147 if (gpu) return true;
00148 return false;
00149 }
00150
00151 EMData* EMData::calc_ccf_cuda( EMData* image, bool use_texturing,bool center ) const {
00152 EMData* tmp;
00153 if (is_complex()) {
00154
00155 tmp = new EMData(*this);
00156 } else {
00157
00158 tmp = do_fft_cuda();
00159 }
00160
00161 Dict d;
00162 EMData* with = 0;
00163 if (image == this) {
00164 d["with"] = (EMData*) tmp;
00165 } else {
00166 if (!image->is_complex()) {
00167 int wnx = image->get_xsize(); int wny = image->get_ysize(); int wnz = image->get_zsize();
00168 if ( wnx != nx || wny != ny || wnz != nz ) {
00169
00170 Region r;
00171 if (nz > 1) {
00172 r = Region((wnx-nx)/2, (wny-ny)/2, (wnz-nz)/2,nx,ny,nz);
00173 }
00174 else if (ny > 1) {
00175 r = Region((wnx-nx)/2, (wny-ny)/2,nx,ny);
00176 }
00177 else throw UnexpectedBehaviorException("Calc_ccf_cuda doesn't work on 1D images");
00178 EMData* tmp = image->get_clip(r);
00179 with = tmp->do_fft_cuda();
00180 delete tmp;
00181 }else {
00182 with = image->do_fft_cuda();
00183 }
00184 d["with"] = (EMData*) with;
00185 } else {
00186
00187 d["with"] = (EMData*)image;
00188 }
00189 }
00190
00191
00192 EMDataForCuda left = tmp->get_data_struct_for_cuda();
00193 CudaDataLock lock(tmp);
00194 if (use_texturing) {
00195 ((EMData*)d["with"])->bind_cuda_texture(false);
00196 emdata_processor_correlation_texture(&left,center);
00197 ((EMData*)d["with"])->unbind_cuda_texture();
00198 } else {
00199 EMDataForCuda right = ((EMData*)d["with"])->get_data_struct_for_cuda();
00200 CudaDataLock lock2((EMData*)d["with"]);
00201 emdata_processor_correlation(&left,&right,center);
00202 }
00203 tmp->gpu_update();
00204
00205
00206
00207 if (with != 0 && image != this) {
00208 delete with;
00209 with = 0;
00210 }
00211
00212 EMData* soln = tmp->do_ift_cuda(false);
00213 soln->gpu_update();
00214 delete tmp;
00215 tmp = 0;
00216
00217 return soln;
00218 }
00219
00220 EMData *EMData::make_rotational_footprint_cuda( bool unwrap)
00221 {
00222 ENTERFUNC;
00223
00224
00225
00226 float edge_mean = 0;
00227 CudaDataLock(this);
00228 if ( rot_fp != 0 && unwrap == true) {
00229 return new EMData(*rot_fp);
00230 }
00231
00232
00233
00234
00236
00240
00241 int cs = (((nx * 7 / 4) & 0xfffff8) - nx) / 2;
00242
00243 static EMData big_clip;
00244 int big_x = nx+2*cs;
00245 int big_y = ny+2*cs;
00246 int big_z = 1;
00247 if ( nz != 1 ) {
00248 big_z = nz+2*cs;
00249 }
00250
00251
00252 if ( big_clip.get_xsize() != big_x || big_clip.get_ysize() != big_y || big_clip.get_zsize() != big_z ) {
00253 big_clip.set_size_cuda(big_x,big_y,big_z);
00254 big_clip.get_cuda_data();
00255 big_clip.cuda_lock();
00256 }
00257 big_clip.to_value(edge_mean);
00258
00259 if (nz != 1) {
00260 big_clip.insert_clip(this,IntPoint(cs,cs,cs));
00261 } else {
00262 big_clip.insert_clip(this,IntPoint(cs,cs,0));
00263 }
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275 EMData *mc = big_clip.calc_ccf_cuda(&big_clip,false,true);
00276 mc->sub(mc->get_edge_mean());
00277
00278 static EMData sml_clip;
00279 int sml_x = nx * 3 / 2;
00280 int sml_y = ny * 3 / 2;
00281 int sml_z = 1;
00282 if ( nz != 1 ) {
00283 sml_z = nz * 3 / 2;
00284 }
00285
00286 if ( sml_clip.get_xsize() != sml_x || sml_clip.get_ysize() != sml_y || sml_clip.get_zsize() != sml_z ) {
00287 sml_clip.set_size_cuda(sml_x,sml_y,sml_z);
00288 sml_clip.get_cuda_data();
00289 sml_clip.cuda_lock();
00290 }
00291 if (nz != 1) {
00292 sml_clip.insert_clip(mc,IntPoint(-cs+nx/4,-cs+ny/4,-cs+nz/4));
00293 } else {
00294 sml_clip.insert_clip(mc,IntPoint(-cs+nx/4,-cs+ny/4,0));
00295 }
00296
00297 delete mc; mc = 0;
00298 EMData * result = NULL;
00299
00300 if (!unwrap || nz != 1) {
00301
00302 result = new EMData(sml_clip);
00303 }
00304 else {
00305 result = sml_clip.unwrap();
00306 }
00307
00308 result->gpu_update();
00309
00310 if ( unwrap == true)
00311 {
00312
00313
00314
00315
00316
00317
00318
00319 rot_fp = result;
00320 return new EMData(*rot_fp);
00321 }
00322 else return result;
00323 }
00324
00325 EMData* EMData::calc_ccfx_cuda( EMData * const with, int y0, int y1, bool no_sum)
00326 {
00327 ENTERFUNC;
00328
00329 if (!with) {
00330 LOGERR("NULL 'with' image. ");
00331 throw NullPointerException("NULL input image");
00332 }
00333
00334 if (!EMUtil::is_same_size(this, with)) {
00335 LOGERR("images not same size: (%d,%d,%d) != (%d,%d,%d)",
00336 nx, ny, nz,
00337 with->get_xsize(), with->get_ysize(), with->get_zsize());
00338 throw ImageFormatException("images not same size");
00339 }
00340 if (get_ndim() > 2) {
00341 LOGERR("2D images only");
00342 throw ImageDimensionException("2D images only");
00343 }
00344
00345 if (y1 <= y0) {
00346 y1 = ny;
00347 }
00348
00349 if (y0 >= y1) {
00350 y0 = 0;
00351 }
00352
00353 if (y0 < 0) {
00354 y0 = 0;
00355 }
00356
00357 if (y1 > ny) {
00358 y1 = ny;
00359 }
00360
00361 static int nx_device_fft = 0;
00362 static int ny_defice_fft = 0;
00363 static EMData f1;
00364 static EMData f2;
00365 static EMData rslt;
00366
00367 int height = y1-y0;
00368 int width = (nx+2-(nx%2));
00369 if (width != nx_device_fft || height != ny_defice_fft ) {
00370 f1.set_size_cuda(width,height);
00371 f2.set_size_cuda(width,height);
00372 rslt.set_size_cuda(nx,height);
00373 nx_device_fft = width;
00374 ny_defice_fft = height;
00375 }
00376
00377 {
00378 float * cd = get_cuda_data();
00379 CudaDataLock lock(this);
00380 float * f1cd = f1.get_cuda_data();
00381 CudaDataLock lock2(&f1);
00382 cuda_dd_fft_real_to_complex_1d(cd,f1cd,nx,height);
00383 }
00384 {
00385 float * wcd = with->get_cuda_data();
00386 CudaDataLock lock(this);
00387 float * f2cd = f2.get_cuda_data();
00388 CudaDataLock lock2(&f2);
00389 cuda_dd_fft_real_to_complex_1d(wcd,f2cd,nx,height);
00390 }
00391
00392 EMDataForCuda left = f1.get_data_struct_for_cuda();
00393 CudaDataLock lock(&f1);
00394
00395 bool use_texturing = false;
00396 bool center = false;
00397 if (use_texturing) {
00398 f2.bind_cuda_texture(false);
00399 emdata_processor_correlation_texture(&left,center);
00400 f2.unbind_cuda_texture();
00401 } else {
00402 EMDataForCuda right = f2.get_data_struct_for_cuda();
00403 CudaDataLock lock2(&f2);
00404 emdata_processor_correlation(&left,&right,center);
00405 }
00406
00407 {
00408 float* rcd = rslt.get_cuda_data();
00409 CudaDataLock rlock(&rslt);
00410 float * f1cd = f1.get_cuda_data();
00411 CudaDataLock lock2(&f1);
00412 cuda_dd_fft_complex_to_real_1d(f1cd,rcd,nx,height);
00413 }
00414
00415 if (no_sum) {
00416 rslt.gpu_update();
00417 EXITFUNC;
00418 return new EMData(rslt);
00419 }
00420 else {
00421 EXITFUNC;
00422 return rslt.column_sum_cuda();
00423 }
00424
00425 }
00426
00427 EMData* EMData::column_sum_cuda() const {
00428 ENTERFUNC;
00429 if (get_ndim() != 2) throw ImageDimensionException("Column sum cuda has been prgogrammed work exclusively with 2D data.");
00430 EMData *cf = new EMData();
00431 cf->set_size_cuda(nx, 1, 1);
00432 EMDataForCuda left = cf->get_data_struct_for_cuda();
00433 CudaDataLock llock(cf);
00434 bind_cuda_texture(false);
00435 emdata_column_sum(&left,ny);
00436 unbind_cuda_texture();
00437 cf->gpu_update();
00438 EXITFUNC;
00439 return cf;
00440 }
00441
00442 void EMData::set_gpu_rw_data(float* data, const int x, const int y, const int z) {
00443 nx = x; ny = y; nz = z;
00444 nxy = nx*ny;
00445 if (cuda_cache_handle!=-1) {
00446 cuda_cache.replace_gpu_rw(cuda_cache_handle,data);
00447 } else {
00448 cuda_cache_handle = cuda_cache.store_rw_data(this,data);
00449 }
00450 gpu_update();
00451 }
00452
00453 void EMData::free_cuda_memory() const {
00454
00455 if (cuda_cache_handle!=-1) {
00456 cuda_cache.clear_item(cuda_cache_handle);
00457 cuda_cache_handle = -1;
00458 }
00459 }
00460
00462 void EMData::copy_gpu_rw_to_cpu() {
00463 get_data();
00464 }
00465
00466 void EMData::copy_cpu_to_gpu_rw() {
00467 get_cuda_data();
00468 }
00469
00470 void EMData::copy_cpu_to_gpu_ro() {
00471 get_cuda_array();
00472 }
00473
00474 void EMData::copy_gpu_rw_to_gpu_ro() {
00475 cuda_cache.copy_rw_to_ro(cuda_cache_handle);
00476 }
00477
00478 void EMData::copy_gpu_ro_to_gpu_rw() const {
00479 cuda_cache.copy_ro_to_rw(cuda_cache_handle);
00480 }
00481
00482 void EMData::copy_gpu_ro_to_cpu() const {
00483 cuda_cache.copy_ro_to_cpu(cuda_cache_handle,rdata);
00484 }
00485
00486
00487 EMData::CudaCache::CudaCache(const int size) : cache_size(size), current_insert_idx(0), mem_allocated(0), locked(size,0)
00488 {
00489 device_init();
00490 rw_cache = new float *[cache_size];
00491 caller_cache = new const EMData*[cache_size];
00492 ro_cache = new cudaArray *[cache_size];
00493
00494 for(int i = 0; i < cache_size; ++ i ) {
00495 rw_cache[i] = 0;
00496 caller_cache[i] = 0;
00497 ro_cache[i] = 0;
00498 }
00499 }
00500
00501 EMData::CudaCache::~CudaCache()
00502 {
00503 for (int i = 0; i < cache_size; i++) {
00504 clear_item(i);
00505 }
00506
00507 if( rw_cache )
00508 {
00509 delete[]rw_cache;
00510 rw_cache = 0;
00511 }
00512
00513
00514 if( caller_cache )
00515 {
00516 delete[]caller_cache;
00517 caller_cache = 0;
00518 }
00519
00520 cleanup_cuda_fft_dd_plan_cache();
00521 }
00522
00523 void EMData::CudaCache::lock(const int idx) {
00524 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00525 locked[idx] += 1;
00526
00527 }
00528 void EMData::CudaCache::unlock(const int idx) {
00529 if (idx < 0 || idx >= cache_size) throw InvalidValueException(idx,"The idx is beyond the cache size");
00530 if (locked[idx] == 0) {
00531
00532 return;
00533
00534
00535 }
00536 locked[idx] -=1;
00537 }
00538
00539 int EMData::CudaCache::cache_rw_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz)
00540 {
00541 ensure_slot_space();
00542
00543 float* cuda_rw_data = alloc_rw_data(nx,ny,nz);
00544
00545 if (data != 0 ) {
00546 size_t num_bytes = nx*ny*nz*sizeof(float);
00547 cudaError_t error = cudaMemcpy(cuda_rw_data,data,num_bytes,cudaMemcpyHostToDevice);
00548 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaMemcpy (host to device) error:" + string(cudaGetErrorString(error)));
00549 }
00550
00551 return blind_store_rw_data(emdata,cuda_rw_data);
00552 }
00553
00554 int EMData::CudaCache::blind_store_rw_data(const EMData* const emdata, float* cuda_rw_data)
00555 {
00556
00557 rw_cache[current_insert_idx] = cuda_rw_data;
00558 caller_cache[current_insert_idx] = emdata;
00559 ro_cache[current_insert_idx] = 0;
00560
00561 int ret = current_insert_idx;
00562 current_insert_idx += 1;
00563 current_insert_idx %= cache_size;
00564 if ( current_insert_idx > cache_size ) throw;
00565
00566 return ret;
00567 }
00568
00569 int EMData::CudaCache::store_rw_data(const EMData* const emdata, float* cuda_rw_data)
00570 {
00571 ensure_slot_space();
00572
00573 int nx = emdata->get_xsize();
00574 int ny = emdata->get_ysize();
00575 int nz = emdata->get_zsize();
00576 size_t num_bytes = nx*ny*nz*sizeof(float);
00577 mem_allocated += num_bytes;
00578
00579 return blind_store_rw_data(emdata, cuda_rw_data);
00580 }
00581
00582 void EMData::CudaCache::debug_print() const {
00583 cout << "Cuda device cache debug. Total mem allocated: " << static_cast<float>(mem_allocated)/1000000.0 << "MB" << endl;
00584 for(int i = 0; i < cache_size; ++i) {
00585 int handle = -1;
00586 int nx = 0;
00587 int ny = 0;
00588 int nz = 0;
00589 if (caller_cache[i] != 0) {
00590 handle = caller_cache[i]->cuda_cache_handle;
00591 nx = caller_cache[i]->get_xsize();
00592 ny = caller_cache[i]->get_ysize();
00593 nz = caller_cache[i]->get_zsize();
00594 }
00595 cout << i << ": " << handle << " " << caller_cache[i] << " dims: " << nx << " " << ny << " " << nz << " locked: " << locked[i] << " rw " << rw_cache[i] << " ro " << ro_cache[i] << endl;
00596
00597 }
00598 }
00599
00600 void EMData::CudaCache::replace_gpu_rw(const int idx,float* cuda_rw_data)
00601 {
00602
00603 if ( rw_cache[idx] != 0) {
00604 mem_allocated -= get_emdata_bytes(idx);
00605 cudaError_t error = cudaFree(rw_cache[idx]);
00606 if ( error != cudaSuccess)
00607 throw UnexpectedBehaviorException( "CudaFree error : " + string(cudaGetErrorString(error)));
00608 }
00609 rw_cache[idx] = 0;
00610
00611 const EMData* d = caller_cache[idx];
00612 int nx = d->get_xsize();
00613 int ny = d->get_ysize();
00614 int nz = d->get_zsize();
00615 size_t num_bytes = nx*ny*nz*sizeof(float);
00616 mem_allocated += num_bytes;
00617
00618 rw_cache[idx] = cuda_rw_data;
00619 }
00620
00621 void EMData::CudaCache::ensure_slot_space() {
00622
00623 int checked_entries = 0;
00624 while ( checked_entries < cache_size) {
00625 const EMData* previous = caller_cache[current_insert_idx];
00626 if (previous != 0 ) {
00627 if ( locked[current_insert_idx] == 0 ) {
00628
00629 previous->cuda_cache_lost_imminently();
00630
00631 clear_item(current_insert_idx);
00632 break;
00633 } else {
00634
00635 current_insert_idx++;
00636 current_insert_idx %= cache_size;
00637
00638 checked_entries++;
00639 }
00640 } else break;
00641 }
00642
00643 if (checked_entries == cache_size) {
00644 throw UnexpectedBehaviorException("All of the data objects in the cuda cache are locked! There is no space.");
00645 }
00646 }
00647
00648 float* EMData::CudaCache::alloc_rw_data(const int nx, const int ny, const int nz) {
00649 float* cuda_rw_data;
00650 size_t num_bytes = nx*ny*nz*sizeof(float);
00651
00652 cudaError_t error = cudaMalloc((void**)&cuda_rw_data,num_bytes);
00653 if ( error != cudaSuccess) {
00654 debug_print();
00655 throw BadAllocException( "cudaMalloc error :" + string(cudaGetErrorString(error)));
00656 }
00657
00658
00659
00660
00661
00662
00663
00664
00665 mem_allocated += num_bytes;
00666
00667 return cuda_rw_data;
00668
00669 }
00670
00671 int EMData::CudaCache::cache_ro_data(const EMData* const emdata, const float* const data,const int nx, const int ny, const int nz) {
00672 ensure_slot_space();
00673
00674 cudaArray *array = get_cuda_array_host(data,nx,ny,nz);
00675 if (array != 0) {
00676 mem_allocated += nx*ny*nz*sizeof(float);
00677
00678 rw_cache[current_insert_idx] = 0;
00679 caller_cache[current_insert_idx] = emdata;
00680 ro_cache[current_insert_idx] = array;
00681
00682 int ret = current_insert_idx;
00683 current_insert_idx += 1;
00684 current_insert_idx %= cache_size;
00685
00686 return ret;
00687 }
00688 else {
00689 throw BadAllocException("The allocation of the CUDA array failed");
00690 }
00691 }
00692
00693
00694 void EMData::CudaCache::copy_rw_to_ro(const int idx) {
00695
00696 if (rw_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RO CUDA data: RW data is null.");
00697
00698 if (ro_cache[idx] != 0) {
00699 cudaError_t error = cudaFreeArray(ro_cache[idx]);
00700 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray error " + string(cudaGetErrorString(error)));
00701 ro_cache[idx] = 0;
00702 }
00703
00704 const EMData* d = caller_cache[idx];
00705 int nx = d->get_xsize();
00706 int ny = d->get_ysize();
00707 int nz = d->get_zsize();
00708
00709 cudaArray *array = get_cuda_array_device(rw_cache[idx],nx,ny,nz);
00710 if (array == 0) throw BadAllocException("The allocation of the CUDA array failed");
00711 ro_cache[idx] = array;
00712 }
00713
00714 void EMData::CudaCache::copy_ro_to_rw(const int idx) {
00715
00716 if (ro_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RW CUDA data: RO data is null.");
00717
00718 if (rw_cache[idx] != 0) {
00719 cudaError_t error = cudaFree(rw_cache[idx]);
00720 if ( error != cudaSuccess)
00721 throw UnexpectedBehaviorException( "CudaFree error " + string(cudaGetErrorString(error)));
00722 rw_cache[idx] = 0;
00723 }
00724
00725 const EMData* d = caller_cache[idx];
00726 int nx = d->get_xsize();
00727 int ny = d->get_ysize();
00728 int nz = d->get_zsize();
00729 size_t num_bytes = nx*ny*nz*sizeof(float);
00730
00731 float* cuda_rw_data = alloc_rw_data(nx,ny,nz);
00732
00733 if (nz > 1) {
00734 cudaExtent extent;
00735 extent.width = nx;
00736 extent.height = ny;
00737 extent.depth = nz;
00738 cudaMemcpy3DParms copyParams = {0};
00739 copyParams.srcArray = ro_cache[idx];
00740 copyParams.dstPtr = make_cudaPitchedPtr((void*)cuda_rw_data, extent.width*sizeof(float), extent.width, extent.height);
00741 copyParams.extent = extent;
00742 copyParams.kind = cudaMemcpyDeviceToDevice;
00743 cudaError_t error = cudaMemcpy3D(©Params);
00744 if ( error != cudaSuccess)
00745 throw UnexpectedBehaviorException( "Copying device array to device pointer - CudaMemcpy3D error : " + string(cudaGetErrorString(error)));
00746
00747 } else if ( ny > 1 ) {
00748 cudaError_t error = cudaMemcpyFromArray(cuda_rw_data,ro_cache[idx],0,0,num_bytes,cudaMemcpyDeviceToDevice);
00749 if ( error != cudaSuccess)
00750 throw UnexpectedBehaviorException( "Copying device array to device pointer - cudaMemcpyFromArray error : " + string(cudaGetErrorString(error)));
00751 } else throw UnexpectedBehaviorException("Cuda infrastructure has not been designed to work on 1D data");
00752
00753 rw_cache[idx] = cuda_rw_data;
00754 }
00755
00756
00757 void EMData::CudaCache::copy_ro_to_cpu(const int idx,float* data) {
00758 if (ro_cache[idx] == 0) throw UnexpectedBehaviorException("Can not update RW CUDA data: RO data is null.");
00759 if (data == 0) throw NullPointerException("The cpu data pointer is NULL in copy_ro_to_cpu");
00760
00761 const EMData* d = caller_cache[idx];
00762 int nx = d->get_xsize();
00763 int ny = d->get_ysize();
00764 int nz = d->get_zsize();
00765 size_t num_bytes = nx*ny*nz*sizeof(float);
00766
00767 if (nz > 1) {
00768 cudaExtent extent;
00769 extent.width = nx;
00770 extent.height = ny;
00771 extent.depth = nz;
00772 cudaMemcpy3DParms copyParams = {0};
00773 copyParams.srcArray = ro_cache[idx];
00774 copyParams.dstPtr = make_cudaPitchedPtr((void*)data, extent.width*sizeof(float), extent.width, extent.height);
00775 copyParams.extent = extent;
00776 copyParams.kind = cudaMemcpyDeviceToHost;
00777 cudaError_t error = cudaMemcpy3D(©Params);
00778 if ( error != cudaSuccess)
00779 throw UnexpectedBehaviorException( "Copying device array to device pointer - CudaMemcpy3D error : " + string(cudaGetErrorString(error)));
00780
00781 } else if ( ny > 1 ) {
00782 cudaError_t error = cudaMemcpyFromArray(data,ro_cache[idx],0,0,num_bytes,cudaMemcpyDeviceToHost);
00783 if ( error != cudaSuccess)
00784 throw UnexpectedBehaviorException( "Copying device array to device pointer - cudaMemcpyFromArray error : " + string(cudaGetErrorString(error)));
00785 } else throw UnexpectedBehaviorException("Cuda infrastructure has not been designed to work on 1D data");
00786
00787 }
00788 void EMData::CudaCache::clear_item(const int idx) {
00789
00790 if ( rw_cache[idx] != 0) {
00791 mem_allocated -= get_emdata_bytes(idx);
00792 cudaError_t error = cudaFree(rw_cache[idx]);
00793 if ( error != cudaSuccess)
00794 throw UnexpectedBehaviorException( "CudaFree error : " + string(cudaGetErrorString(error)));
00795 }
00796 rw_cache[idx] = 0;
00797
00798 if ( ro_cache[idx] != 0) {
00799 mem_allocated -= get_emdata_bytes(idx);
00800 cudaError_t error = cudaFreeArray(ro_cache[idx]);
00801 if ( error != cudaSuccess) throw UnexpectedBehaviorException( "CudaFreeArray error : " + string(cudaGetErrorString(error)));
00802
00803 }
00804 ro_cache[idx] = 0;
00805
00806 caller_cache[idx] = 0;
00807
00808 locked[idx] = 0;
00809 }
00810
00811
00812 EMData::CudaDataLock::CudaDataLock(const EMData* const emdata) : data_cuda_handle(-1)
00813 {
00814 emdata->set_gpu_rw_current();
00815 data_cuda_handle = emdata->cuda_cache_handle;
00816 EMData::cuda_cache.lock(data_cuda_handle);
00817 }
00818
00819 EMData::CudaDataLock::~CudaDataLock() {
00820 EMData::cuda_cache.unlock(data_cuda_handle);
00821 }
00822
00823
00824 #endif //EMAN2_USING_CUDA