58#ifdef EMAN2_USING_CUDA
62#include <cuda_runtime_api.h>
63#include <driver_functions.h>
71const EMData* EMData::firstinlist = 0;
72const EMData* EMData::lastinlist = 0;
73int EMData::memused = 0;
74int EMData::fudgemem = 1.024E8;
75int EMData::cudadevicenum = -1;
76bool EMData::usecuda = 0;
77bool EMData::nocudainit = (getenv(
"NOCUDAINIT") == NULL) ? 0 :
bool(atoi(getenv(
"NOCUDAINIT")));
79bool EMData::copy_to_cuda()
const
84 cudaError_t error = cudaMemcpy(cudarwdata,
rdata,num_bytes,cudaMemcpyHostToDevice);
85 if ( error != cudaSuccess) {
95bool EMData::copy_to_cudaro()
const
100 memused += num_bytes;
108bool EMData::rw_alloc()
const
110 if(cudarwdata){
return true;}
111 num_bytes =
nxyz*
sizeof(float);
112 if(!freeup_devicemem(num_bytes)){
return false;}
113 cudaError_t error = cudaMalloc((
void**)&cudarwdata,num_bytes);
114 if ( error != cudaSuccess){
return false;}
123bool EMData::ro_alloc()
const
125 if(cudarodata){
return true;}
126 num_bytes =
nxyz*
sizeof(float);
127 if(!freeup_devicemem(num_bytes)){
return false;}
140void EMData::bindcudaarrayA(
const bool intp_mode)
const
151void EMData::unbindcudaarryA()
const
162void EMData::bindcudaarrayB(
const bool intp_mode)
const
173void EMData::unbindcudaarryB()
const
184bool EMData::copy_from_device(
const bool rocpy)
186 if(cudarwdata && !rocpy){
187 if(
rdata == 0){
rdata = (
float*)malloc(num_bytes);}
188 cudaError_t error = cudaMemcpy(
rdata,cudarwdata,num_bytes,cudaMemcpyDeviceToHost);
189 if ( error != cudaSuccess)
throw UnexpectedBehaviorException(
"CudaMemcpy (device to host) failed:" +
string(cudaGetErrorString(error)));
191 if(cudarodata) ro_free();
192 }
else if (cudarodata && rocpy) {
193 if(
rdata == 0){
rdata = (
float*)malloc(num_bytes);}
199 cudaMemcpy3DParms copyParams = {0};
200 copyParams.srcArray = cudarodata;
201 copyParams.dstPtr = make_cudaPitchedPtr((
void*)
rdata, extent.width*
sizeof(
float), extent.width, extent.height);
202 copyParams.extent = extent;
203 copyParams.kind = cudaMemcpyDeviceToHost;
204 cudaError_t error = cudaMemcpy3D(©Params);
205 if ( error != cudaSuccess)
throw UnexpectedBehaviorException(
"RO CudaMemcpy (device to host) failed:" +
string(cudaGetErrorString(error)));
207 cudaError_t error = cudaMemcpyFromArray(
rdata,cudarodata,0,0,num_bytes,cudaMemcpyDeviceToHost);
208 if ( error != cudaSuccess)
throw UnexpectedBehaviorException(
"RO CudaMemcpy (device to host) failed:" +
string(cudaGetErrorString(error)));
211 if(cudarwdata) rw_free();
220bool EMData::copy_rw_to_ro()
const
223 if(cudarwdata == 0){
return false;}
226 if(!freeup_devicemem(num_bytes)){
return false;}
229 memused += num_bytes;
241void EMData::runcuda(
float * results)
const
250 cudarwdata = results;
254void EMData::rw_free()
const
256 cudaError_t error = cudaFree(cudarwdata);
257 if ( error != cudaSuccess){
258 cout <<
rdata <<
" " << cudarwdata << endl;
262 memused -= num_bytes;
263 if(!cudarodata){removefromlist();}
267void EMData::ro_free()
const
269 cudaError_t error = cudaFreeArray(cudarodata);
272 memused -= num_bytes;
273 if(!cudarwdata){removefromlist();}
277bool EMData::isrodataongpu()
const
279 if(cudarodata != 0 && !roneedsupdate){
280 if(cudarodata !=0) elementaccessed();
284 if(copy_rw_to_ro()){;
295bool EMData::freeup_devicemem(
const int& num_bytes)
const
297 size_t freemem=0, totalmem=0;
298 cudaMemGetInfo(&freemem, &totalmem);
300 if ((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
304 while(lastinlist != 0){
305 if(lastinlist->cudarwdata){
307 const_cast<EMData*
>(lastinlist)->copy_from_device();
309 if(lastinlist->cudarodata){
310 const_cast<EMData*
>(lastinlist)->copy_from_device(1);
312 cudaMemGetInfo(&freemem, &totalmem);
313 if((ptrdiff_t(freemem) - ptrdiff_t(fudgemem)) > ptrdiff_t(num_bytes)){
return true;}
320void EMData::setdirtybit()
const
325void EMData::addtolist()
const
328 if(firstinlist == 0){
335 firstinlist->nextlistitem =
this;
336 prevlistitem = firstinlist;
343void EMData::elementaccessed()
const
346 if(firstinlist ==
this){
return;}
351void EMData::removefromlist()
const
355 if(firstinlist == lastinlist){
362 if(nextlistitem !=0){
363 nextlistitem->prevlistitem = prevlistitem;
365 firstinlist = prevlistitem;
367 if(prevlistitem !=0){
368 prevlistitem->nextlistitem = nextlistitem;
370 lastinlist = nextlistitem;
377void EMData::switchoncuda()
382void EMData::switchoffcuda()
387void EMData::cuda_cleanup()
392 if(lastinlist->cudarwdata) lastinlist->rw_free();
393 if(lastinlist && lastinlist->cudarodata) lastinlist->ro_free();
398 if(EMData::cudadevicenum >= 0)
401 sprintf(filename,
"%s%d",
cudalockfile,EMData::cudadevicenum);
407bool EMData::cuda_initialize()
409 if(EMData::nocudainit)
return 0;
414 EMData::cudadevicenum = device;
423const char* EMData::getcudalock()
EMData stores an image's data and defines core image processing routines.
float * rdata
image real data
void do_cuda_fft_cache_destroy()
bool copy_to_array(const float *data, cudaArray *array, const int nx, const int ny, const int n, const cudaMemcpyKind memkindz)
cudaArray * get_cuda_array(const int nx, const int ny, const int nz)
int device_init()
Initialize the cuda device Can be called any number of times but the actual initialization occurs onl...
void unbind_cuda_textureA(const int ndims)
void bind_cuda_array_to_textureB(const cudaArray *const array, const int ndims, const bool interp_mode)
const char *const cudalockfile
void unbind_cuda_textureB(const int ndims)
void bind_cuda_array_to_textureA(const cudaArray *const array, const int ndims, const bool interp_mode)
#define UnexpectedBehaviorException(desc)