attachment:emen2client.py of EMEN2/emen2client.py

Attachment 'emen2client.py'

   1 # $Id: emen2client.py,v 1.106 2010/11/19 09:08:26 irees Exp $
   2 
   3 #!/usr/bin/env python
   4 import time
   5 import os
   6 import sys
   7 import subprocess
   8 import optparse
   9 import getpass
  10 import glob
  11 import collections
  12 import operator
  13 import fnmatch
  14 import struct
  15 import copy
  16 import shutil
  17 import urllib
  18 import urllib2
  19 import urlparse
  20 import httplib
  21 import xmlrpclib
  22 import gzip
  23 import zlib
  24 import math
  25 import StringIO
  26 import socket
  27 import tempfile
  28 import re
  29 
  30 try:
  31 	import distutils.version
  32 except:
  33 	pass
  34 
  35 __version__ = "$Revision: 1.106 $".split(":")[1][:-1].strip()
  36 USER_AGENT = "emen2client/%s"%__version__
  37 
  38 try:
  39 	import json
  40 except ImportError:
  41 	try:
  42 		import simplejson as json
  43 	except ImportError:
  44 		print """Note: No JSON module found. This is not necessary, but is recommended. Please upgrade to Python 2.6 or install simplejson, using "sudo easy_install simplejson" """
  45 		json = None
  46 
  47 try:
  48 	import EMAN2
  49 	import hashlib
  50 except ImportError:
  51 	# print "EMAN2 support not available"
  52 	EMAN2 = None
  53 
  54 
  55 try:
  56 	import xml.etree.ElementTree as ET
  57 except ImportError:
  58 	ET = None
  59 
  60 try:
  61 	import emen2.clients.jsonrpc.proxy
  62 except:
  63 	pass 
  64 
  65 	
  66 
  67 
  68 def check_output(args, **kwds):
  69 	kwds.setdefault("stdout", subprocess.PIPE)
  70 	kwds.setdefault("stderr", subprocess.STDOUT)
  71 	p = subprocess.Popen(args, **kwds)
  72 	return p.communicate()[0]
  73 
  74 
  75 
  76 
  77 ##################################
  78 # Compression schemes
  79 ##################################
  80 
  81 # Compression is handled on the fly now during upload / download with a gzip pipe
  82 
  83 # Based on http://code.activestate.com/recipes/551784/
  84 class GzipPipe(StringIO.StringIO):
  85 	"""This class implements a compression pipe suitable for asynchronous
  86 	process."""
  87 
  88 	# Size of the internal buffer
  89 	CHUNCK_SIZE = 1024*1024
  90 	COMPRESSLEVEL = 3
  91 
  92 	def __init__(self, filename=None) :
  93 		"""Streaming compression using gzip.GzipFile
  94 
  95 		@param filename source file
  96 
  97 		"""
  98 
  99 		self.filename = filename
 100 		self.source = open(self.filename, "rb")
 101 		self.source_eof = False
 102 		self.buffer = ""
 103 		self.name = self.filename + ".gz"
 104 
 105 		self.pos = 0
 106 
 107 		StringIO.StringIO.__init__(self)
 108 		#super(GzipPipe, self).__init__(self)
 109 		self.zipfile = gzip.GzipFile(filename=str(os.path.basename(filename)), mode='wb', compresslevel=self.COMPRESSLEVEL, fileobj=self)
 110 
 111 
 112 	def write(self, data):
 113 		"""Writes data to internal buffer. Do not call from outside."""
 114 		self.buffer += data
 115 
 116 
 117 	def read(self, size=-1) :
 118 		"""Calling read() on a zip pipe will suck data from the source stream.
 119 		@param	size Maximum size to read - Read whole compressed file if not specified.
 120 		@return Compressed data
 121 		"""
 122 
 123 		# Feed the zipped buffer by writing source data to the zip stream
 124 		while ((len(self.buffer) < size) or (size == -1)) and not self.source_eof:
 125 
 126 			# Get a chunk of source data
 127 			chunk = self.source.read(GzipPipe.CHUNCK_SIZE)
 128 			self.pos += len(chunk)
 129 
 130 			# Feed the source zip file (that fills the compressed buffer)
 131 			self.zipfile.write(chunk)
 132 
 133 			# End of source file ?
 134 			if (len(chunk) < GzipPipe.CHUNCK_SIZE) :
 135 				self.source_eof = True
 136 				self.zipfile.flush()
 137 				self.zipfile.close()
 138 				self.source.close()
 139 				break
 140 
 141 
 142 		# We have enough data in the buffer (or source file is EOF): Give it to the output
 143 		if size == 0:
 144 			result = ""
 145 		if size >= 1:
 146 			result = self.buffer[0:size]
 147 			self.buffer = self.buffer[size:]
 148 		else: # size < 0 : All requested
 149 			result = self.buffer
 150 			self.buffer = ""
 151 
 152 		return result
 153 
 154 
 155 
 156 ##################################
 157 # Exceptions
 158 ##################################
 159 
 160 class OverwriteError(Exception):
 161 	"""File exists, and flags are set to prevent overwriting"""
 162 
 163 
 164 
 165 ##################################
 166 # Options
 167 ##################################
 168 
 169 class DBRPCOptions(optparse.OptionParser):
 170 	"""Default options to be used by EMEN2 clients"""
 171 
 172 	def __init__(self, *args, **kwargs):
 173 		#super(DBRPCOptions, self).__init__(self, *args, **kwargs)
 174 		optparse.OptionParser.__init__(self, *args, **kwargs)
 175 
 176 		defaulthost = os.getenv("EMEN2URI", "http://ncmidb.bcm.edu")
 177 		self.add_option("--username","-U", type="string", help="Username")
 178 		self.add_option("--password","-P", type="string", help="Password (Note: specifying passwords in shell commands is not secure)")
 179 
 180 		self.add_option("--json", "-J", action="store_true", default=False, help="Enable JSON-RPC (Experimental)")
 181 
 182 		self.add_option("--host","-H",type="string",help="Host endpoint URI; default is %s"%defaulthost, default=defaulthost)
 183 		self.add_option("--ctxid","-C",type="string",help="Valid Context ID")
 184 
 185 	#def load_config(self):
 186 	#	"""Load from config file"""
 187 	#	pass
 188 
 189 
 190 
 191 ##################################
 192 # Main exported DB interface classes
 193 ##################################
 194 
 195 
 196 	
 197 class DBJSONRPCProxy(object):
 198 	_ctxid = property(lambda s: s.proxy.get_ctxid())
 199 	def __init__(self, host=None, username=None, password=None, ctxid=None):
 200 		self.proxy = emen2.clients.jsonrpc.proxy.Emen2Proxy(host)
 201 		if username:
 202 			self.proxy._login(username, password)
 203 		else:
 204 			self.proxy.set_ctxid(ctxid)
 205 		self._host = host
 206 	def __getattr__(self, name):
 207 		return getattr(self.proxy, name)
 208 	def _login(self, username=None, password=None):
 209 		self.proxy._login(username, password)
 210 	def _setContext(self, ctxid):
 211 		self.proxy.set_ctxid(ctxid)
 212 	def _getContext():
 213 		self.proxy.get_ctxid()
 214 
 215 
 216 
 217 
 218 class DBXMLRPCProxy(xmlrpclib.ServerProxy):
 219 	"""Provide a more feature-rich DB XMLRPC interface than the bare python xmlrpclib"""
 220 
 221 	def __init__(self, host=None, username=None, password=None, ctxid=None):
 222 		#super(DBXMLRPCProxy, self).__init__(host, allow_none=True)
 223 		xmlrpclib.ServerProxy.__init__(self, host, allow_none=True)
 224 
 225 		self.__username = username
 226 		self.__password = password
 227 
 228 		self._ServerProxy__allow_none = True
 229 		self._host = host
 230 		self._handler = self._ServerProxy__handler
 231 
 232 		self._setContext(ctxid)
 233 
 234 
 235 	def __wrap_request(self, method, params):
 236 		try:
 237 			ret = self._ServerProxy__request(method, params)
 238 		except xmlrpclib.Error, err:
 239 			#raise xmlrpclib.Fault(0, "Error: %s"%(err.headers.get('X-Error')))
 240 			raise ValueError, str(err.headers.get('X-Error'))
 241 
 242 		except Exception, e:
 243 			raise
 244 
 245 		return ret
 246 
 247 
 248 	def __getattr__(self, name):
 249 		# print "-> ", name
 250 		return xmlrpclib._Method(self.__wrap_request, name)
 251 
 252 
 253 	def _login(self, username=None, password=None):
 254 		ctxid = self.__wrap_request("login", (username or self.__username, password or self.__password))
 255 		self.__password = None
 256 		return self._setContext(ctxid)
 257 
 258 
 259 	def _setContext(self, ctxid):
 260 		# ian: todo: make handler a dynamically generated property? maybe not..
 261 		self._ctxid = ctxid
 262 		if self._ctxid != None:
 263 			self._ServerProxy__handler = "%s?ctxid=%s"%(self._handler, self._ctxid)
 264 		return self._ctxid
 265 
 266 
 267 	def _getContext():
 268 		return self._ctxid
 269 
 270 
 271 
 272 
 273 ##################################
 274 # File Transport
 275 ##################################
 276 
 277 class Transport(object):
 278 	def __init__(self, db=None, bdo=None, bdoid=None, param="file_binary", recid=None, newrecord=None, compress=True, filename=None, filedata=None, report=None, pos=None):
 279 		"""Needs a BDO or BDOID for download, or a filename and recid | newrecord for upload
 280 
 281 		@keyparam db DBProxy
 282 		@keyparam bdo BDO object
 283 		@keyparam bdoid BDO Id
 284 		@keyparam filename Filename to upload
 285 		@keyparam filedata Upload a string buffer instead of file reference. In this case, filename will be used for the name of the file.
 286 		@keyparam report Upload/download status report hook/callback
 287 		@keyparam compress Compress file
 288 		"""
 289 
 290 		self.db = db
 291 		self.bdo = bdo
 292 		self.bdoid = bdoid
 293 		self.filename = filename
 294 		self.filedata = filedata
 295 		self.newrecord = newrecord
 296 		self.param = param
 297 		self.recid = recid
 298 		self.compress = compress
 299 		self.pos = pos
 300 		self._report = report
 301 		self.recmap = {}
 302 
 303 
 304 	def _retryhandler(self, method, *args, **kwargs):
 305 		"""Automatic retry wrapper
 306 		@param method self._upload or self._download
 307 		"""
 308 
 309 		count = 0
 310 		ret = None
 311 		while True:
 312 			count += 1
 313 			try:
 314 				ret = method(*args, **kwargs)
 315 				break
 316 
 317 			except KeyboardInterrupt, e:
 318 				print "Keyboard Interrupt"
 319 				raise
 320 
 321 			# socket.error is recoverable, others are not recoverable!
 322 			except socket.error, e:
 323 				self.report("Network error, trying again in 10 seconds (%s attempts): %s"%(count, e), i=1)
 324 				time.sleep(10)
 325 
 326 			except Exception, e:
 327 				self.report("Unrecoverable error, aborting: %s"%(e), i=1)
 328 				raise
 329 
 330 		return ret
 331 
 332 
 333 	def report(self, msg=None, progress=None, pos=None, i=0):
 334 		try:
 335 			self._report(msg=msg, progress=progress, f=self.filename or self.bdo.get("name"), pos=pos, i=i+1)
 336 		except:
 337 			pass
 338 
 339 
 340 	def sidecar_read(self, filename=None):
 341 		filename = filename or self.filename
 342 		try:
 343 			return json.load(file(filename+".json","r"))
 344 		except:
 345 			pass
 346 
 347 
 348 	def sidecar_write(self, filename=None):
 349 		filename = filename or self.filename
 350 		try:
 351 			json.dump(self.bdo, file(filename+".json", "w"), indent=True)
 352 			self.report("Wrote sidecar: %s.json"%filename, i=1)
 353 		except:
 354 			pass
 355 
 356 
 357 	def setdb(self, db):
 358 		self.db = db
 359 
 360 
 361 	def setreport(self, report):
 362 		self._report = report
 363 
 364 
 365 	def setrecmap(self, recmap):
 366 		self.recmap = recmap
 367 		self.recid = recmap.get(self.recid, self.recid)
 368 		if self.newrecord:
 369 			self.newrecord["children"] = [recmap.get(i,i) for i in self.newrecord.get("children",[])]
 370 			self.newrecord["parents"] = [recmap.get(i,i) for i in self.newrecord.get("parents",[])]
 371 
 372 
 373 	def action(self, *args, **kwargs):
 374 		pass
 375 
 376 
 377 
 378 class DownloadTransport(Transport):
 379 
 380 	def action(self, overwrite=False, rename=False, sidecar=False, *args, **kwargs):
 381 		"""Download BDO to local disk
 382 
 383 		@keyparam overwrite Overwrite an existing file
 384 		@keyparam rename If file exists, rename file to avoid overwriting
 385 
 386 		"""
 387 
 388 		self.report(pos=self.pos)
 389 
 390 		try:
 391 			# Check that we can download the file, and then get it
 392 			self._download_checkfile(overwrite=overwrite, rename=rename)
 393 			self.report("Downloading %s -> %s..."%(self.bdo.get("filename"), self.filename), i=1)
 394 
 395 			ret = self._retryhandler(self._download)
 396 
 397 			if sidecar:
 398 				self.sidecar_write()
 399 
 400 		except OverwriteError, e:
 401 			# Or report that we can't download it
 402 			self.report("Skipping file: %s"%e, i=1)
 403 			return self.filename
 404 
 405 		return self.filename
 406 
 407 
 408 
 409 
 410 	def _download_checkfile(self, overwrite=False, rename=False):
 411 		"""Check if we can write the file, and if to use compression or not. See download() for params"""
 412 
 413 		filename = os.path.basename(self.bdo.get("filename")) or self.bdo.get("name")
 414 
 415 		fsplit = os.path.splitext(filename)
 416 		if self.compress and fsplit[-1] == ".gz":
 417 			compress = True
 418 			filename = fsplit[0]
 419 		else:
 420 			compress = False
 421 
 422 		# this may raise OverwriteError Exception, which should be caught by caller
 423 		if os.path.exists(filename) and not overwrite:
 424 			if rename:
 425 				filename = "duplicate.%s:%s"%(self.bdo.get("recid"), filename)
 426 			else:
 427 				raise OverwriteError, "File exists: %s"%filename
 428 
 429 		self.filename = filename
 430 		self.compress = compress
 431 
 432 
 433 
 434 	def _download(self):
 435 		"""Actual download."""
 436 
 437 		# Setup connection
 438 		ctxid = urllib.urlencode({"ctxid":self.db._ctxid})
 439 
 440 		# ian: changed .netloc to [1] because it caused failure on python 2.4
 441 		host = urlparse.urlparse(self.db._host)[1] # .netloc
 442 		path = "/download/%s?%s"%(self.bdo.get("name"), ctxid)
 443 
 444 		# Connect
 445 		http = httplib.HTTPConnection(host)
 446 		http.request("GET", path)
 447 		resp = http.getresponse()
 448 		print(resp.getheader('content-length'))
 449 		clength = float(resp.getheader('content-length'))
 450 
 451 		# Download and pipe through gzip
 452 		stdout = None
 453 		if self.compress:
 454 			stdout = open(self.filename, "wb")
 455 			gz = subprocess.Popen(["gzip","-d"], stdout=stdout, stdin=subprocess.PIPE)
 456 			fio = gz.stdin
 457 		else:
 458 			fio = open(self.filename,"wb")
 459 
 460 		chunksize = 1024*1024
 461 		outlen = 0
 462 		while outlen < clength:
 463 			chunk = resp.read(chunksize)
 464 			outlen += len(chunk)
 465 			fio.write(chunk)
 466 			self.report(progress=(outlen/clength))
 467 
 468 
 469 		fio.close()
 470 		if stdout: stdout.close()
 471 		http.close()
 472 
 473 		return self.filename
 474 
 475 
 476 
 477 
 478 class UploadTransport(Transport):
 479 	"""Transfer files from/to DB"""
 480 
 481 	########################
 482 	# Upload methods
 483 	########################
 484 
 485 	def action(self):
 486 		"""Handler provides records and files to put in DB"""
 487 
 488 		self.report(pos=self.pos)
 489 
 490 		s = self.sidecar_read()
 491 		if s != None:
 492 			self.report("File already exists in database. Check record %s."%s.get("recid"), i=1)
 493 			self.bdo = s
 494 			self.recid = s.get('recid')
 495 			return
 496 
 497 		ret = self._retryhandler(self._upload)
 498 		self.sidecar_write()
 499 		return ret
 500 
 501 
 502 
 503 	def _upload(self, mimetype=None):
 504 		"""Implements Transfer-Encoding:Chunked over a PUT request, with optional gzip pipe streaming. See upload() for arguments."""
 505 
 506 		# Upload a string buffer as a file
 507 		if self.filedata:
 508 			filesize = len(self.filedata)
 509 			fio = StringIO.StringIO(self.filedata)
 510 			filename = self.filename
 511 
 512 
 513 		# Or upload a file from disk
 514 		else:
 515 			# Get file size for progress bar
 516 			filesize = float(os.stat(self.filename).st_size)
 517 
 518 			# Are we compressing this file?
 519 			if self.compress:
 520 				fio = GzipPipe(filename=self.filename)
 521 			else:
 522 				fio = open(self.filename, "rb")
 523 
 524 			filename = os.path.basename(fio.name)
 525 
 526 
 527 		if filesize == 0:
 528 			self.report("Warning: Uploading empty file!")
 529 			filesize = 1
 530 
 531 
 532 		# Prepare connection info
 533 		# recid = recid or self.newrecord.get('recid')
 534 		qs = {
 535 			"param": self.param,
 536 			"ctxid": self.db._ctxid,
 537 			"filename": filename
 538 		}
 539 
 540 
 541 		# If rec is set, with json support, use one-step newrecord/putbinary
 542 		if self.newrecord:
 543 			tmprecid = self.newrecord.get("recid")
 544 			if json:
 545 				qs["newrecord"] = json.dumps(self.newrecord)
 546 				self.recid = None
 547 			else:
 548 				self.report("Warning: JSON support is strongly recommended for upload; using workaround..")
 549 				newrec = self.db.putrecord(self.newrecord)
 550 				self.recid = newrec.get("recid")
 551 
 552 
 553 		if self.newrecord == None and self.recid == None:
 554 			raise ValueError, "No new record or target recid specified for upload"
 555 
 556 		# ._put
 557 		qs = urllib.urlencode(qs)
 558 		host = self.db._host.split("://")[-1]
 559 		path = "/upload/%s?%s"%(self.recid or "", qs)
 560 
 561 		# Set headers
 562 		headers = {"User-Agent":USER_AGENT}
 563 		headers["Transfer-Encoding"] = "chunked"
 564 		if self.compress:
 565 			mimetype = "application/x-gzip"
 566 		if mimetype:
 567 			headers["Content-Type"] = mimetype
 568 
 569 
 570 		# Open connection
 571 		http = httplib.HTTPConnection(host)
 572 		http.request("PUT", path, headers=headers)
 573 
 574 		t = time.time()
 575 
 576 		# Upload in chunks
 577 		chunksize = 1024*1024
 578 
 579 		while True:
 580 			try:
 581 				chunk = fio.read(chunksize)
 582 				cs = len(chunk)
 583 				http.send("%X\r\n"%(cs))
 584 				http.send(chunk)
 585 				http.send("\r\n")
 586 				self.report(progress=(fio.tell()/filesize))
 587 
 588 			except socket.error:
 589 				if fio: fio.close()
 590 				raise
 591 
 592 			if not chunk:
 593 				if fio: fio.close()
 594 				break
 595 
 596 		# Responses..
 597 		resp = http.getresponse()
 598 		status, reason, response = resp.status, resp.reason, resp.read()
 599 		http.close()
 600 
 601 		if status not in range(200, 300):
 602 			raise httplib.HTTPException, "Bad response code: %s: %s"%(status, reason)
 603 
 604 		kbsec = (filesize / (time.time() - t))/1024
 605 
 606 		if json:
 607 			response = json.loads(response)
 608 			self.bdo = response
 609 			self.recid = self.bdo.get("recid")
 610 			self.report("Done. Uploaded %s to record %s @ %0.2f kb/sec"%(self.filename, self.bdo.get("recid"), kbsec), i=1)
 611 			if self.newrecord:
 612 				self.recmap[tmprecid] = self.bdo.get("recid")
 613 
 614 		else:
 615 			self.report("Done. Uploaded %s @ %0.2f kb/sec"%(self.filename, kbsec), i=1)
 616 
 617 		return response
 618 
 619 
 620 
 621 
 622 class NewRecordTransport(Transport):
 623 	"""This class is similar to UploadTransport, but doesn't upload a file, just creates a record"""
 624 
 625 	def action(self, *args, **kwargs):
 626 		tmprecid = self.newrecord.get('recid')
 627 		newrec = self.db.putrecord(self.newrecord)
 628 		self.recmap[tmprecid] = newrec.get('recid')
 629 
 630 		# The return isn't a BDO, but it will do..
 631 		self.recid = newrec.get('recid')
 632 		self.bdo = {'recid': self.recid}
 633 
 634 
 635 
 636 
 637 ##################################
 638 # Base FileHandler
 639 ##################################
 640 
 641 class FileHandler(object):
 642 
 643 	request_params = []
 644 
 645 	def __init__(self, db=None, recid=None, filename=None, filedata=None, rectype=None, applyparams=None, pos=None, options=None, **kwargs):
 646 		"""Provide special support for different filetypes
 647 
 648 		@keyparam db DBProxy
 649 		@keyparam recid Record ID for download or upload
 650 		@keyparam filename Filename for upload
 651 		@keyparam rectype Record type for default-setting record creation
 652 		@keyparam applyparams Parameters to overlay on new records (see: CCDHandler)
 653 		@keyparam pos Display this FileHandler's position in the overall queue, tuple (position, total)
 654 		@keyparam options Options. See UploadController and DownloadController OptionParsers for details.
 655 		"""
 656 
 657 		self.db = db
 658 
 659 		# target recid for upload or download
 660 		self.recid = recid
 661 
 662 		# filename for upload..
 663 		self.filename = filename
 664 		self.filedata = filedata
 665 		self.bdos = []
 666 
 667 		# new records to apply to uploads
 668 		self.rectype = rectype
 669 		self.applyparams = applyparams or {}
 670 
 671 		# default and passed options
 672 		self.options = {
 673 			"recurse": -1,
 674 			"overwrite": False,
 675 			"rename": False,
 676 			"compress": True,
 677 		}
 678 
 679 		if options:
 680 			self.options.update(options)
 681 
 682 		if kwargs:
 683 			self.options.update(kwargs)
 684 
 685 		# internal attributes
 686 		self.tmpfiles = []
 687 		self.checked = False
 688 		self.quiet = False
 689 		self.pos = pos
 690 		self.progress = 0.0
 691 		self.listeners = []
 692 
 693 		self.init()
 694 
 695 
 696 	def init(self):
 697 		pass
 698 
 699 
 700 
 701 	######################
 702 	# Upload
 703 	######################
 704 
 705 	def upload(self):
 706 		"""Start the upload process for this handler"""
 707 
 708 		ret = collections.defaultdict(list)
 709 		recmap = {}
 710 
 711 		self.report(pos=self.pos, f=self.filename)
 712 		self.report("Preparing for upload", i=1)
 713 
 714 		self.prepare_upload()
 715 		files = self.get_upload_items()
 716 
 717 		self.report("Checking and committing records", i=1)
 718 
 719 
 720 		# Check records, call newrecord to properly init, validate, set permissions, etc.
 721 		# If a particular record doesn't validate, will throw exception
 722 		for f in files:
 723 			if f.newrecord:
 724 				f.newrecord = self._newrecordinit(newrecord=f.newrecord)
 725 								
 726 
 727 		# Upload other files/newrecords
 728 		filecount = len(files)
 729 		for count, f in enumerate(files):
 730 
 731 			f.setdb(self.db)
 732 			f.setreport(self.report)
 733 			f.setrecmap(recmap)
 734 
 735 			target_recid = f.recid
 736 
 737 			if filecount > 1:
 738 				f.pos = (count, filecount)
 739 
 740 			f.action()
 741 			recmap[target_recid] = f.recid
 742 			ret[f.filename] = f.bdo
 743 
 744 		self.cleanup()
 745 		return ret
 746 
 747 
 748 	def get_upload_items(self):
 749 		"""Returns records and files to upload"""
 750 		f = UploadTransport(recid=self.recid, filename=self.filename)
 751 		return [f]
 752 
 753 
 754 	def prepare_upload(self):
 755 		"""Do any required prep work before files can be uploaded"""
 756 		self.check_db()
 757 
 758 
 759 	def _newrecordinit(self, newrecord):
 760 		"""Initialize and validate a new record"""
 761 
 762 		tmprec = self.db.newrecord(newrecord.get('rectype'), self.recid)
 763 		tmprec.update(newrecord)
 764 		self.db.validaterecord(tmprec)
 765 		return tmprec
 766 
 767 
 768 
 769 	######################
 770 	# Download
 771 	######################
 772 
 773 	def download(self):
 774 		"""Start the download process for this handler"""
 775 
 776 		overwrite = self.options.get("overwrite")
 777 		rename = self.options.get("rename")
 778 		compress = self.options.get("compress")
 779 		sidecar = self.options.get("sidecar")
 780 
 781 		self.report(pos=self.pos, f=self.recid)
 782 		self.report("Checking for items to download", i=1)
 783 
 784 		recs, bdos = self.get_download_items()
 785 
 786 		bdolen = len(bdos)
 787 
 788 		if bdos:
 789 			self.report("Found %s items in %s records"%(bdolen, len(recs)), i=1)
 790 		else:
 791 			self.report("Found no downloadable items in %s records"%len(recs), i=1)
 792 			return
 793 
 794 		ret = {}
 795 
 796 		for count, bdo in enumerate(bdos):
 797 			u = DownloadTransport(db=self.db, bdo=bdo, report=self.report, compress=self.options.get("compress"), pos=(count, bdolen))
 798 			u.action(overwrite=overwrite, rename=rename, sidecar=sidecar)
 799 			ret[u.filename] = u.bdo
 800 
 801 		self.download_postprocess(recs, ret)
 802 
 803 		return ret
 804 
 805 
 806 
 807 	def download_postprocess(self, recs, ret):
 808 		pass
 809 
 810 
 811 
 812 	def get_download_items(self):
 813 		"""Find child recods of self.recid and get their BDOs"""
 814 
 815 		#recs = self.db.getchildren(self.recid, "record", self.options.get("recurse"), None, True, True)
 816 		recs = self.db.getchildren(self.recid, self.options.get("recurse"))
 817 		recs.append(self.recid)
 818 		bdos = self.db.getbinary(recs)
 819 		return recs, bdos
 820 
 821 
 822 
 823 	########################
 824 	# Status updates
 825 	########################
 826 
 827 	def addlistener(self, callback):
 828 		self.listeners.append(callback)
 829 
 830 
 831 	def report(self, msg=None, progress=None, f=None, pos=None, i=0):
 832 
 833 		for listener in self.listeners:
 834 			listener(msg=msg, progress=progress, f=f)
 835 
 836 		if self.quiet:
 837 			return
 838 
 839 		# Normally suppress this...
 840 		if progress != None and msg == None:
 841 			return
 842 
 843 		if not msg:
 844 			if pos:
 845 				print "\n%s%s of %s: %s"%("\t"*i, pos[0]+1, pos[1], f)
 846 			else:
 847 				print "\n%s%s"%("\t"*i, f)
 848 
 849 		else:
 850 			print "%s%s"%("\t"*i, msg)
 851 
 852 
 853 	########################
 854 	# Utility methods
 855 	########################
 856 
 857 
 858 	def _shortsleep(self):
 859 		time.sleep(10)
 860 
 861 
 862 	def check_db(self):
 863 		pass
 864 
 865 
 866 	########################
 867 	# Overrideable methods for upload control
 868 	########################
 869 
 870 	def cleanup(self):
 871 		for i in self.tmpfiles:
 872 			try:
 873 				self.report("Cleanup temp file: %s"%i, i=1)
 874 				os.unlink(i)
 875 			except Exception, e:
 876 				self.report("Warning: Could not remove %s: %s"%(i, e), i=1)
 877 
 878 
 879 
 880 
 881 
 882 
 883 #################################
 884 # Filetype-specific handlers
 885 ##################################
 886 
 887 
 888 class NewRecordFileHandler(FileHandler):
 889 	def get_upload_items(self):
 890 		newrecord = {}
 891 		newrecord["recid"] = -1
 892 		newrecord["rectype"] = self.rectype
 893 		newrecord["parents"] = [self.recid]
 894 		return [UploadTransport(newrecord=newrecord, filename=self.filename, param="file_binary_image")]
 895 
 896 
 897 
 898 class VolumeHandler(NewRecordFileHandler):
 899 	def get_upload_items(self):
 900 		newrecord = {}
 901 		newrecord["recid"] = -1
 902 		newrecord["title_structure"] = self.filename
 903 		newrecord["rectype"] = self.rectype
 904 		newrecord["parents"] = [self.recid]
 905 		return [UploadTransport(newrecord=newrecord, filename=self.filename, param="file_binary_image")]
 906 
 907 
 908 
 909 class BoxHandler(FileHandler):
 910 	def get_upload_items(self):
 911 		newrecord = {}
 912 		newrecord["recid"] = -1
 913 		newrecord["rectype"] = "box"
 914 		newrecord["parents"] = [self.recid]
 915 		newrecord.update(self.applyparams)
 916 		return [UploadTransport(newrecord=newrecord, filename=self.filename, filedata=self.filedata, compress=False, param="box")]
 917 
 918 
 919 
 920 class ParticleHandler(FileHandler):
 921 	def get_upload_items(self):
 922 		newrecord = {}
 923 		newrecord["recid"] = -1
 924 		newrecord["rectype"] = "particles"
 925 		newrecord["parents"] = [self.recid]
 926 		return [UploadTransport(newrecord=newrecord, filename=self.filename, compress=False, param="ptcl")]
 927 
 928 
 929 
 930 class GridImagingFileHandler(FileHandler):
 931 	def check_db(self):
 932 		gi = self.db.getrecord(self.recid)
 933 
 934 		if gi.get("rectype") != "grid_imaging":
 935 			raise Exception, "This action may only be used with grid_imaging sessions!"
 936 
 937 		microscopy = self.db.getparents(self.recid, 1, ["microscopy"])
 938 		if not microscopy:
 939 			self.report("\tWARNING! No microscopy record present for grid_imaging session!")
 940 			#raise Exception, "No microscopy record present for grid_imaging session!"
 941 
 942 
 943 
 944 class MicrographHandler(GridImagingFileHandler):
 945 	"""Creates a placeholder for a scan to be uploaded in the future"""
 946 
 947 	def get_upload_items(self):
 948 		newrecord = {}
 949 		newrecord["recid"] = -1
 950 		newrecord["rectype"] = "micrograph"
 951 		newrecord["id_micrograph"] = os.path.basename(self.filename)
 952 		newrecord.update(self.applyparams)
 953 		newrecord["parents"] = [self.recid]
 954 		return [NewRecordTransport(newrecord=newrecord, filename=self.filename)]
 955 	
 956 
 957 
 958 class JADASHandler(GridImagingFileHandler):
 959 	"""Creates a placeholder for a scan to be uploaded in the future"""
 960 
 961 
 962 	def get_upload_items(self):
 963 		# This is run for a .tif file produced by JADAS. Find the associated .xml files, load them, map as many
 964 		# parameters as possible, and attach the raw xml file.
 965 		jadas_params, foundfiles = self.load_jadas_xml()
 966 		newrecord = {}
 967 		newrecord["recid"] = -100
 968 		newrecord["rectype"] = "ccd_jadas"
 969 		newrecord["id_micrograph"] = os.path.basename(self.filename)
 970 		newrecord.update(self.applyparams)
 971 		newrecord.update(jadas_params)
 972 		newrecord["parents"] = [self.recid]
 973 		
 974 		files = [UploadTransport(recid=-100, filename=self.filename, param='file_binary_image')]
 975 		files[0].newrecord = newrecord
 976 		
 977 		for i in foundfiles:
 978 			files.append(UploadTransport(recid=-100, filename=i, compress=False))
 979 				
 980 		return files
 981 
 982 
 983 	def load_jadas_xml(self):
 984 		# find related XML files, according to JADAS naming conventions
 985 		# take off the .tif, because the xml could be either file.tif_metadata.xml or file_metadata.xml
 986 		foundfiles = []
 987 		ret = {}		
 988 		for xmlfile in glob.glob('%s_*.xml'%self.filename) + glob.glob('%s_*.xml'%self.filename.replace('.tif','')):
 989 			print "Attempting to load ", xmlfile
 990 			try:
 991 				e = ET.parse(xmlfile)
 992 				root = e.getroot()
 993 				# There should be a loader for each root tag type, e.g. TemParameter -> map_jadas_TemParameter
 994 				loader = getattr(self, 'map_jadas_%s'%root.tag, None)
 995 				if loader:
 996 					ret.update(loader(root))
 997 					foundfiles.append(xmlfile)
 998 			except Exception, e:
 999 				print "Could not load %s: %s"%(xmlfile, e)
1000 
1001 		return ret, foundfiles
1002 
1003 
1004 
1005 	def map_jadas_TemParameter(self, root):
1006 		"""One of these long, ugly, metadata-mapping methods"""
1007 		ret = {}
1008 		# Defocus
1009 		ret['defocus_absdac'] = root.find('Defocus/defocus').get('absDac')
1010 		ret['defocus_realphysval'] = root.find('Defocus/defocus').get('relPhisVal')
1011 		ret['intendeddefocus_valinnm'] = root.find('Defocus/intendedDefocus').get('valInNm')
1012 		d = root.find('Defocus/intendedDefocus').get('valInNm')
1013 		if d != None:
1014 			d = float(d) / 1000.0
1015 			ret['ctf_defocus_set'] = d
1016 
1017 		# Eos
1018 		ret['eos_brightdarkmode'] = root.find('Eos/eos').get('brightDarkMode')
1019 		ret['eos_darklevel'] = root.find('Eos/eos').get('darkLevel')
1020 		ret['eos_stiglevel'] = root.find('Eos/eos').get('stigLevel')
1021 		ret['eos_temasidmode'] = root.find('Eos/eos').get('temAsidMode')
1022 		ret['eos_htlevel'] = root.find('Eos/eos').get('htLevel')
1023 		ret['eos_imagingmode'] = root.find('Eos/eos').get('imagingMode')
1024 		ret['eos_magcamindex'] = root.find('Eos/eos').get('magCamIndex')
1025 		ret['eos_spectrummode'] = root.find('Eos/eos').get('spectrumMode')
1026 		ret['eos_illuminationmode'] = root.find('Eos/eos').get('illuminationMode')
1027 		ret['eos_spot'] = root.find('Eos/eos').get('spot')
1028 		ret['eos_alpha'] = root.find('Eos/eos').get('alpha')
1029 
1030 		# Lens
1031 		ret['lens_cl1dac'] = root.find('Lens/lens').get('cl1Dac')
1032 		ret['lens_cl2dac'] = root.find('Lens/lens').get('cl2Dac')
1033 		ret['lens_cl3dac'] = root.find('Lens/lens').get('cl3Dac')
1034 		ret['lens_cmdac'] = root.find('Lens/lens').get('cmDac')
1035 		ret['lens_il1dac'] = root.find('Lens/lens').get('il1Dac')
1036 		ret['lens_il2dac'] = root.find('Lens/lens').get('il2Dac')
1037 		ret['lens_il3dac'] = root.find('Lens/lens').get('il3Dac')
1038 		ret['lens_il4dac'] = root.find('Lens/lens').get('il4Dac')
1039 		ret['lens_pl1dac'] = root.find('Lens/lens').get('pl1Dac')
1040 		ret['lens_pl2dac'] = root.find('Lens/lens').get('pl2Dac')
1041 		ret['lens_pl3dac'] = root.find('Lens/lens').get('pl3Dac')
1042 		
1043 		# Def
1044 		ret['def_gunshiftx'] = root.find('Def/def').get('gunShiftX')
1045 		ret['def_gunshifty'] = root.find('Def/def').get('gunShiftY')
1046 		ret['def_guntiltx'] = root.find('Def/def').get('gunTiltX')
1047 		ret['def_guntilty'] = root.find('Def/def').get('gunTiltY')
1048 		ret['def_beamshiftx'] = root.find('Def/def').get('beamShiftX')
1049 		ret['def_beamshifty'] = root.find('Def/def').get('beamShiftY')
1050 		ret['def_beamtiltx'] = root.find('Def/def').get('beamTiltX')
1051 		ret['def_beamtilty'] = root.find('Def/def').get('beamTiltY')			
1052 		ret['def_clstigx'] = root.find('Def/def').get('clStigX')
1053 		ret['def_clstigy'] = root.find('Def/def').get('clStigY')
1054 		ret['def_olstigx'] = root.find('Def/def').get('olStigX')
1055 		ret['def_olstigy'] = root.find('Def/def').get('olStigY')
1056 		ret['def_ilstigx'] = root.find('Def/def').get('ilStigX')
1057 		ret['def_ilstigy'] = root.find('Def/def').get('ilStigY')
1058 		ret['def_imageshiftx'] = root.find('Def/def').get('imageShiftX')
1059 		ret['def_imageshifty'] = root.find('Def/def').get('imageShiftY')
1060 		ret['def_plax'] = root.find('Def/def').get('plaX')
1061 		ret['def_play'] = root.find('Def/def').get('plaY')
1062 		
1063 		# HT
1064 		ret['ht_ht'] = root.find('HT/ht').get('ht')
1065 		ret['ht_energyshift'] = root.find('HT/ht').get('energyShift')
1066 		
1067 		# MDS
1068 		ret['mds_mdsmode'] = root.find('MDS/mds').get('mdsMode')
1069 		ret['mds_blankingdef'] = root.find('MDS/mds').get('blankingDef')
1070 		ret['mds_defx'] = root.find('MDS/mds').get('defX')
1071 		ret['mds_defy'] = root.find('MDS/mds').get('defY')
1072 		ret['mds_blankingtype'] = root.find('MDS/mds').get('blankingType')
1073 		ret['mds_blankingtime'] = root.find('MDS/mds').get('blankingTime')
1074 		ret['mds_shutterdelay'] = root.find('MDS/mds').get('shutterDelay')
1075 		
1076 		# Photo
1077 		ret['photo_exposuremode'] = root.find('PHOTO/photo').get('exposureMode')
1078 		ret['photo_manualexptime'] = root.find('PHOTO/photo').get('manualExpTime')
1079 		ret['photo_filmtext'] = root.find('PHOTO/photo').get('filmText')
1080 		ret['photo_filmnumber'] = root.find('PHOTO/photo').get('filmNumber')
1081 		
1082 		# GonioPos
1083 		ret['goniopos_x'] = root.find('GonioPos/gonioPos').get('x')
1084 		ret['goniopos_y'] = root.find('GonioPos/gonioPos').get('y')
1085 		ret['goniopos_z'] = root.find('GonioPos/gonioPos').get('z')
1086 		ret['goniopos_tiltx'] = root.find('GonioPos/gonioPos').get('tiltX')
1087 		ret['goniopos_rotortilty'] = root.find('GonioPos/gonioPos').get('rotOrTiltY')
1088 
1089 		return ret
1090 		
1091 		
1092 		
1093 	def map_jadas_DigitalCameraParameter(self, root):
1094 		attrmap = {
1095 			'CameraName': 'ccd_id',
1096 			'AreaTop': 'digicamprm_areatop',
1097 			'AreaBottom': 'digicamprm_areabottom',
1098 			'AreaLeft': 'digicamprm_arealeft',
1099 			'AreaRight': 'digicamprm_arearight',
1100 			'Exposure': 'time_exposure_tem',
1101 			'Binning': 'binning',
1102 			'PreIrradiation': 'digicamcond_preirradiation',
1103 			'BlankingTime': 'digicamcond_blankingtime',
1104 			'BlankBeam': 'digicamcond_blankbeam',
1105 			'CloseScreen': 'digicamcond_closescreen',
1106 			'DataFormat': 'digicamcond_dataformat'		
1107 		}
1108 
1109 		ret = {}
1110 		for i in root.findall('*/tagCamPrm'):
1111 			param = attrmap.get(i.get('tagAttrName'))
1112 			value = i.get('tagAttrVal')
1113 			if param != None and value != None:
1114 				ret[param] = value		
1115 
1116 		return ret
1117 		
1118 		
1119 		
1120 	def map_jadas_IntensityBasedHoleSelection(self, root):
1121 		ret = {}
1122 		return ret
1123 		
1124 			
1125 
1126 
1127 class CCDHandler(GridImagingFileHandler):
1128 
1129 	request_params = [
1130 		'tem_magnification_set',
1131 		'ctf_defocus_set',
1132 		'angstroms_per_pixel',
1133 		'time_exposure_tem',
1134 		'tem_dose_rate',
1135 		'current_screen',
1136 		'ice_type',
1137 		'assess_ice_thick',
1138 		'assess_ice_comments',
1139 		'assess_image_quality',
1140 		'ccd_id',
1141 		'status_energy_filter',
1142 		'binning'
1143 		]
1144 
1145 
1146 	def get_upload_items(self):
1147 		newrecord = {}
1148 		newrecord["recid"] = -1
1149 		newrecord["rectype"] = "ccd"
1150 		newrecord["id_ccd_frame"] = os.path.basename(self.filename)
1151 		newrecord.update(self.applyparams)
1152 		newrecord["parents"] = [self.recid]
1153 		# read metadata
1154 		return [UploadTransport(newrecord=newrecord, filename=self.filename, param="file_binary_image")]
1155 
1156 
1157 
1158 
1159 class ScanHandler(GridImagingFileHandler):
1160 
1161 	request_params = [
1162 		'scanner_film',
1163 		'scanner_cartridge',
1164 		'scan_average',
1165 		'nikon_gain',
1166 		'scan_step',
1167 		'angstroms_per_pixel'
1168 		]
1169 
1170 
1171 	def get_upload_items(self):
1172 		print "\tChecking for existing micrograph..."
1173 		idmap = collections.defaultdict(set)
1174 		mc = self.db.getchildren(self.recid, 1, "micrograph")
1175 		mc = self.db.getrecord(mc)
1176 		for rec in mc:
1177 			i = rec.get('id_micrograph', '').strip().lower()
1178 			idmap[i].add(rec.get('recid'))
1179 
1180 
1181 
1182 		# Ugly.
1183 		match = self.filename.split(".")[0].strip().lower()
1184 		matches = idmap[match]
1185 
1186 		if len(idmap[match]) == 0:
1187 			print "\tCould not find micrograph for %s -- creating new micrograph."%match
1188 
1189 			mrec = {}
1190 			mrec["recid"] = -1
1191 			mrec["rectype"] = "micrograph"
1192 			mrec["parents"] = [self.recid]
1193 			mrec["id_micrograph"] = match
1194 
1195 			newrecord = {}
1196 			newrecord["recid"] = -2
1197 			newrecord["rectype"] = self.rectype
1198 			newrecord["parents"] = [-1]
1199 			newrecord.update(self.applyparams)
1200 
1201 			m = NewRecordTransport(newrecord=mrec)
1202 			s = UploadTransport(newrecord=newrecord, filename=self.filename, param="file_binary_image")
1203 
1204 			sidecar = s.sidecar_read()
1205 			if sidecar:
1206 				print "\tThis scan already appears to be uploaded! Check record ID %s"%sidecar.get('recid')
1207 				return []
1208 
1209 			return [m, s]
1210 
1211 
1212 		elif len(idmap[match]) == 1:
1213 			matches = matches.pop()
1214 			print "\tFound match for %s: %s"%(match, matches)
1215 			newrecord = {}
1216 			newrecord["recid"] = -1
1217 			newrecord["rectype"] = self.rectype
1218 			newrecord["parents"] = [matches]
1219 			newrecord.update(self.applyparams)
1220 
1221 			return [UploadTransport(newrecord=newrecord, filename=self.filename, param="file_binary_image")]
1222 
1223 
1224 		elif len(idmap[match]) > 1:
1225 			print "\tAmbiguous matches for %s: %s -- skipping"%(match, matches)
1226 			return []
1227 
1228 
1229 
1230 
1231 
1232 class StackHandler(GridImagingFileHandler):
1233 
1234 	# See notes at bottom of file
1235 	header_labels = [
1236 		# X,Y,Z size
1237 		['stack_size_nx', 'i', 0],
1238 		['stack_size_ny', 'i', 0],
1239 		['stack_size_nz', 'i', 0],
1240 		## Mode: mode, 0 = unsigned byte, 1 = short int, 2 = float, 3 = short*2, 4 = float*2
1241 		['stack_data_mode', 'i', 0],
1242 		## Starting point of sub image: nxstart, nystart, nzstart
1243 		['stack_start_nx', 'i', 0],
1244 		['stack_start_ny', 'i', 0],
1245 		['stack_start_nz', 'i', 0],
1246 		## Grid Size: mx, my, mz
1247 		['stack_size_mx', 'i', 0],
1248 		['stack_size_my', 'i', 0],
1249 		['stack_size_mz', 'i', 0],
1250 		## Cell size: xlen, ylen, zlen... pixel spacing = xlen/mx
1251 		['stack_size_xlen', 'i', 0],
1252 		['stack_size_ylen', 'i', 0],
1253 		['stack_size_zlen', 'i', 0],
1254 		## Cell angles: alpha, beta, gamma
1255 		['stack_angle_alpha', 'f', 0.0],
1256 		['stack_angle_beta', 'f', 0.0],
1257 		['stack_angle_gamma', 'f', 0.0],
1258 		## Map column. Ignored by IMOD: mapc, mapr, maps. (1=x, 2=y, 3=z)
1259 		['stack_map_mapc', 'i', 0],
1260 		['stack_map_mapr', 'i', 0],
1261 		['stack_map_maps', 'i', 0],
1262 		## Minimum pixel value, amin
1263 		['stack_pixel_min', 'f', 0.0],
1264 		## Maximum pixel value, amax
1265 		['stack_pixel_max', 'f', 0.0],
1266 		## Mean pixel value, amean
1267 		['stack_pixel_mean', 'f', 0.0],
1268 		## Image type, ispg
1269 		['stack_data_ispg', 'h', 0],
1270 		## Space group number, nsymbt
1271 		['stack_data_nsymbt', 'h', 0],
1272 		## Number of bytes in extended header, next
1273 		['stack_data_extheadersize', 'i', 0],
1274 		## Creator ID, creatid
1275 		['stack_data_creatid', 'h', 0],
1276 		# 30 bytes of unused data: See explanation above
1277 		['stack_unused', '30s', ''],
1278 		## Number of bytes per section (SerialEM format), nint
1279 		['stack_data_bytespersection', 'h', 0],
1280 		## Flags for which types of short data (SerialEM format), nreal. (See above documentation.)
1281 		['stack_data_extheaderflags', 'h', 0],
1282 		# 28 bytes of unused data: See explanation above
1283 		['stack_unused2', '28s', ''],
1284 		['stack_data_idtype', 'h', 0],
1285 		['stack_data_lens', 'h', 0],
1286 		['stack_data_nd1', 'h', 0],
1287 		['stack_data_nd2', 'h', 0],
1288 		['stack_data_vd1', 'h', 0],
1289 		['stack_data_vd2', 'h', 0],
1290 		['stack_data_tiltangles_orig', 'fff', [0.0, 0.0, 0.0]],
1291 		['stack_data_tiltangles_current', 'fff', [0.0, 0.0, 0.0]],
1292 		['stack_data_xorg', 'f', 0.0],
1293 		['stack_data_yorg', 'f', 0.0],
1294 		['stack_data_zorg', 'f', 0.0],
1295 		['stack_data_cmap', '4s', ''],
1296 		# big/little endian flag
1297 		['stack_data_stamp', '4s', ''],
1298 		['stack_data_rms', 'f', 0.0],
1299 		['stack_data_nlabl', 'i', 0],
1300 		# spliced seperately: see after
1301 		['stack_data_labels', '80s'*10, '']
1302 	]
1303 
1304 
1305 	extheader_flags = {
1306 		1: {
1307 			'pack': 'h',
1308 			'load': lambda x:[x[0] / 100.0],
1309 			'dump': lambda x:[x[0] * 100],
1310 			'dest': ['specimen_tilt'],
1311 			},
1312 
1313 		2: {
1314 			'pack': 'hhh',
1315 			'load': lambda x:[x],
1316 			'dump': lambda x:[x],
1317 			'dest': ['stack_data_montage']
1318 			},
1319 
1320 		4: {
1321 			'pack': 'hh',
1322 			'load': lambda x:[x[0] / 25.0 , x[1] / 25.0],
1323 			'dump': lambda x:[x[0] * 25   , x[1] * 25],
1324 			'dest': ['stack_stagepos_x', 'stack_stagepos_y']
1325 			},
1326 
1327 		8: {
1328 			'pack': 'h',
1329 			'load': lambda x:[x[0] / 10.0],
1330 			'dump': lambda x:[x[0] * 10],
1331 			'dest': ['tem_magnification_set']
1332 			},
1333 
1334 		16: {
1335 			'pack': 'h',
1336 			'load': lambda x:[x[0] / 25000.0],
1337 			'dump': lambda x:[x[0] * 25000],
1338 			'dest': ['stack_intensity']
1339 			},
1340 
1341 		32: {
1342 			'pack': 'hh',
1343 			'load': lambda x:[sign(x[0])*(math.fabs(x[0])*256+(math.fabs(x[1])%256))*2**(sign(x[1])*(int(math.fabs(x[1]))/256))],
1344 			'dump': lambda x:[0,0],
1345 			'dest': ['stack_dose']
1346 			}
1347 	}
1348 
1349 
1350 
1351 	def download_postprocess(self, recids, ret):
1352 
1353 		rec = self.db.getrecord(self.recid)
1354 		print "Rebuilding .st file. It is generally a good idea to run this download in a new directory."
1355 
1356 		workfile = rec.get('stack_filename')
1357 		if os.access(workfile, os.F_OK):
1358 			raise ValueError, "File %s already exists -- not going to overwrite. Exiting."
1359 
1360 		f = open(workfile, "wb")
1361 
1362 		print "Making header"
1363 		a = self._make_header(rec)
1364 		f.write(a)
1365 
1366 		print "Making extheader"
1367 		slices = self.db.getrecord(sorted(self.db.getchildren(self.recid, 1, "stackimage"), reverse=True))
1368 
1369 
1370 		hl = rec.get('stack_data_extheaderflags')
1371 		hs = rec.get('stack_data_extheadersize')
1372 		b = self._make_extheader(slices, stack_data_extheaderflags=hl)
1373 		padsize = hs - len(b)
1374 		f.write(b)
1375 		print "\t...size was %s, padding by %s to %s / %s"%(len(b), padsize, len(b)+padsize, hs)
1376 		f.write("\0"*padsize)
1377 
1378 		print "Adding images to stack"
1379 
1380 		reti = {}
1381 		for k,v in ret.items():
1382 			reti[v.get("recid")] = k
1383 
1384 		for rec in slices:
1385 			fn = reti.get(rec.get("recid"))
1386 			print fn
1387 			fnopen = open(fn, "rb")
1388 			fnopen.seek(1024)
1389 			data = fnopen.read()
1390 			f.write(data)
1391 			print "\t...wrote %s bytes"%len(data)
1392 			fnopen.close()
1393 			os.unlink(fn)
1394 
1395 		f.close()
1396 
1397 
1398 
1399 	def init(self):
1400 		pass
1401 
1402 
1403 	def prepare_upload(self):
1404 		try:
1405 			self.header = self.get_header()
1406 			self.slices = self.get_extheader()
1407 			self.update_maxangles()
1408 			self.header['stack_filename'] = self.filename
1409 		except Exception, inst:
1410 			raise ValueError, "This does not appear to be a SerielEM stack: %s"%inst
1411 
1412 
1413 		# Extract headers for backup, unstack file and compress, add to tmpfiles to be removed after upload
1414 		self.check_db()
1415 
1416 		self.tmpdir = tempfile.mkdtemp()
1417 
1418 		self.raw_headers = self.backup_headers()
1419 		self.tmpfiles.extend(self.raw_headers)
1420 
1421 		self.unstacked_files = self.unstack()
1422 		self.tmpfiles.extend(self.unstacked_files)
1423 
1424 
1425 	def get_upload_items(self):
1426 		"""Get basic records and files to put in database. These will be filled out more fully with db.newrecord, etc."""
1427 
1428 		stackrec = copy.copy(self.header)
1429 		stackrec["recid"] = -100
1430 		stackrec["parents"] = [self.recid]
1431 
1432 		files = [UploadTransport(recid=-100, filename=i, compress=False) for i in self.raw_headers]
1433 		files[0].newrecord = stackrec
1434 
1435 		for offset, (s, s_filename) in enumerate(zip(self.slices, self.unstacked_files)):
1436 			s["recid"] = -(offset+101)
1437 			s["parents"] = [-100]
1438 			files.append(UploadTransport(newrecord=s, filename=s_filename, param="file_binary_image"))
1439 
1440 		return files
1441 
1442 
1443 	########
1444 	# Stack specific methods, mostly header related
1445 
1446 	def update_maxangles(self):
1447 		if self.slices[0].has_key("specimen_tilt"):
1448 			tilts = [i['specimen_tilt'] for i in self.slices]
1449 
1450 		self.header["stack_maxangle"] = max(tilts)
1451 		self.header["stack_minangle"] = min(tilts)
1452 
1453 
1454 	def get_header(self):
1455 		"""Read an IMOD/SerialEM/MRC stack header"""
1456 		f = open(self.filename,"rb")
1457 		h = f.read(1024)
1458 		f.close()
1459 
1460 		return self._handle_header(h)
1461 
1462 
1463 	def get_extheader(self):
1464 		if not self.header:
1465 			self.header = self.get_header()
1466 
1467 		if not self.header["stack_data_extheadersize"]:
1468 			return []
1469 
1470 		f = open(self.filename, "rb")
1471 		h = f.read(1024)
1472 		eh = f.read(self.header["stack_data_extheadersize"])
1473 		f.close()
1474 
1475 		return self._handle_extheader(eh)
1476 
1477 
1478 	def backup_headers(self):
1479 		fin = open(self.filename, 'rb')
1480 		raw_header = fin.read(1024)
1481 		raw_extheader = fin.read(self.header["stack_data_extheadersize"])
1482 		fin.close()
1483 
1484 		ret = []
1485 
1486 		header_outfile = "%s/%s.header"%(self.tmpdir, os.path.basename(self.filename))
1487 
1488 		fout = open(header_outfile, 'wb')
1489 		fout.write(raw_header)
1490 		fout.close()
1491 		ret.append(header_outfile)
1492 
1493 		if raw_extheader:
1494 			extheader_outfile = "%s/%s.header"%(self.tmpdir, os.path.basename(self.filename))
1495 			fout = open(extheader_outfile, 'wb')
1496 			fout.write(raw_extheader)
1497 			fout.close()
1498 			ret.append(extheader_outfile)
1499 
1500 		return ret
1501 
1502 
1503 	def unstack(self, slices=None):
1504 		"""Unstack slices into new files.
1505 		@keyparam slices Single slice or list of slices to unstack; default is all.
1506 		@return Filenames of unstacked files
1507 		"""
1508 
1509 		raw_header = self._make_unstacked_header()
1510 		header_offset, size = self._get_offset_and_size()
1511 		fstack = open(self.filename, "rb")
1512 
1513 		ret = []
1514 
1515 		if slices and not hasattr(slices, "__iter__"):
1516 			slices = [slices]
1517 		if not slices:
1518 			slices = range(len(self.slices))
1519 
1520 		for n in slices:
1521 			read_offset = header_offset + (size*n)
1522 
1523 			fout_name = "%s/%s.%s.mrc"%(self.tmpdir, os.path.basename(self.filename), n)
1524 
1525 			self.report("Unstacking frame %s into %s"%(n, fout_name), i=1)
1526 
1527 			# check_filenames = [fout_name]
1528 			# check_filenames.append(fout_name+".gz")
1529 			# if not filter(None, map(os.path.exists, check_filenames)):
1530 
1531 			#ret.append(fout_name)
1532 			#continue
1533 
1534 			fout = open(fout_name, "wb")
1535 			fout.write(raw_header)
1536 			fstack.seek(read_offset)
1537 
1538 			blen = 1024*1024*8
1539 			for buffer_size in [blen]*(size/blen)+[size%blen]:
1540 				copy_buffer = fstack.read(buffer_size)
1541 				fout.write(copy_buffer)
1542 
1543 			fout.close()
1544 
1545 			ret.append(fout_name)
1546 
1547 		fstack.close()
1548 		return ret
1549 
1550 
1551 	def stack(self, outfile=None):
1552 		pass
1553 
1554 
1555 	def _get_offset_and_size(self):
1556 		# 4    int     mode;      Types of pixel in image.  Values used by IMOD:
1557 		# 	 0 = unsigned bytes,
1558 		# 	 1 = signed short integers (16 bits),
1559 		# 	 2 = float,
1560 		# 	 3 = short * 2, (used for complex data)
1561 		# 	 4 = float * 2, (used for complex data)
1562 		#                          6 = unsigned 16-bit integers (non-standard)
1563 		# 	16 = unsigned char * 3 (for rgb data, non-standard)
1564 
1565 		header_offset = 1024 + self.header["stack_data_extheadersize"]
1566 		depth = 2
1567 		dmode = self.header['stack_data_mode']
1568 		if dmode in [0]:
1569 			depth = 1
1570 		elif dmode in [2, 3]:
1571 			depth = 4
1572 		elif dmode in [4]:
1573 			depth = 8
1574 		elif dmode in [16]:
1575 			depth = 3
1576 
1577 		slicesize = self.header["stack_size_nx"] * self.header["stack_size_ny"] * depth
1578 
1579 		return header_offset, slicesize
1580 
1581 
1582 	def _handle_header(self, h):
1583 		"""Extract data from header string (1024 bytes) and process"""
1584 
1585 		d={}
1586 		offset = 0
1587 
1588 		for dest, format, default in self.header_labels:
1589 			size = struct.calcsize(format)
1590 			value = struct.unpack(format, h[offset:offset+size])
1591 			if len(value) == 1: d[dest] = value[0]
1592 			else: d[dest] = value
1593 			offset += size
1594 
1595 		# Process data labels
1596 		n = d["stack_data_nlabl"]
1597 		d["stack_data_labels"] = [i.strip() for i in d["stack_data_labels"][:n]]+[""]*(10-n)
1598 
1599 		# Include rectype
1600 		d["rectype"] = "stack"
1601 		d["recid"] = -1
1602 
1603 		# This guy gives us trouble
1604 		d['stack_data_stamp'] = d['stack_data_stamp'].partition('\x00')[0]
1605 
1606 		# Delete unused items
1607 		try:
1608 			del d["stack_unused"]
1609 			del d["stack_unused2"]
1610 		except:
1611 			pass
1612 
1613 		return d
1614 
1615 
1616 	def _make_unstacked_header(self):
1617 
1618 		h = copy.deepcopy(self.header)
1619 		for i in ['stack_size_nz', 'stack_size_zlen']:
1620 			h[i] = 1
1621 		for i in ['stack_data_extheadersize', 'stack_data_bytespersection', 'stack_data_extheaderflags']:
1622 			h[i] = 0
1623 
1624 		rawheader = self._make_header(h)
1625 		return rawheader
1626 
1627 
1628 	def _make_header(self, header):
1629 		rawheader = []
1630 
1631 		header["stack_unused"] = ""
1632 		header["stack_unused2"] = ""
1633 
1634 		for dest, format, default in self.header_labels:
1635 			value = header.get(dest, default)
1636 			size = struct.calcsize(format)
1637 			if hasattr(value,"__iter__"):
1638 				value = struct.pack(format, *value)
1639 			else:
1640 				value = struct.pack(format, value)
1641 			# print dest, format, header.get(dest), value
1642 			rawheader.append(value)
1643 
1644 		return "".join(rawheader)
1645 
1646 
1647 	def _make_extheader(self, slices, stack_data_extheaderflags=0):
1648 		eh = []
1649 
1650 		# ian: todo: calculate keys based on info in DB instead of extheaderflags
1651 		keys = self._extheader_getkeys(stack_data_extheaderflags)
1652 
1653 		for s in slices:
1654 			for key in keys:
1655 				_k = self.extheader_flags.get(key)
1656 
1657 				value = [s[i] for i in _k['dest']]
1658 				value = _k['dump'](value)
1659 				if hasattr(value, "__iter__"):
1660 					value = struct.pack(_k['pack'], *value)
1661 				else:
1662 					value = struct.pack(_k['pack'], value)
1663 
1664 				# print key, _k, [s[i] for i in _k['dest']]
1665 				eh.append(value)
1666 
1667 		return "".join(eh)
1668 
1669 
1670 	def _handle_extheader(self, eh):
1671 		"""Process extended header"""
1672 
1673 		d = self.header
1674 		ed = []
1675 		keys = self._extheader_getkeys(d["stack_data_extheaderflags"])
1676 
1677 		offset = 0
1678 		for i in range(0, d["stack_size_nz"]):
1679 			sslice = {}
1680 			for key in keys:
1681 				_k = self.extheader_flags.get(key)
1682 				size = struct.calcsize(_k['pack'])
1683 				# print "Consuming %s bytes (%s:%s) for %s"%(size, i+offset, i+offset+size, _k['dest'])
1684 				value = struct.unpack(_k['pack'], eh[offset: offset+size])
1685 				value = _k['load'](value)
1686 
1687 				for _i in zip(_k['dest'], value):
1688 					sslice[_i[0]] = _i[1]
1689 
1690 				offset += size
1691 
1692 			sslice["rectype"] = "stackimage"
1693 
1694 			ed.append(sslice)
1695 
1696 		return ed
1697 
1698 
1699 	def _extheader_getkeys(self, flags):
1700 		keys = []
1701 		for i in sorted(self.extheader_flags.keys()):
1702 			if flags & i:
1703 				keys.append(i)
1704 		return keys
1705 
1706 
1707 
1708 
1709 
1710 ##################################
1711 # Application controllers
1712 ##################################
1713 
1714 
1715 class EMEN2ClientController(object):
1716 	def __init__(self, args=None, options=None):
1717 		"""EMEN2 Client. DBRPCProxy available as 'db' attr after login."""
1718 		self.options = options
1719 		self.args = None
1720 		self.parser = None
1721 
1722 		self.db = None
1723 
1724 		self.setparser()
1725 		if args != None:
1726 			self.parse_args(args)
1727 
1728 
1729 	def setparser(self):
1730 		self.parser = DBRPCOptions()
1731 		self.setparser_add_option()
1732 
1733 
1734 	def setparser_add_option(self):
1735 		pass
1736 
1737 
1738 	def parse_args(self, inargs):
1739 		self.options, self.args = self.parser.parse_args(inargs)
1740 		self.check_args()
1741 
1742 
1743 	def check_args(self):
1744 		pass
1745 
1746 
1747 	def run(self):
1748 		pass
1749 
1750 
1751 	def login(self):
1752 		self.options.username = self.options.username or raw_input("Username: ")
1753 		self.options.password = self.options.password or getpass.getpass("Password: ")
1754 
1755 		if self.options.json:
1756 			self.db = DBJSONRPCProxy(host=self.options.host)
1757 		else:
1758 			self.db = DBXMLRPCProxy(host=self.options.host)
1759 		self.checkversion()
1760 		try:
1761 			self.ctxid = self.db._login(username=self.options.username, password=self.options.password)
1762 		except Exception, e:
1763 			print "Unable to login: %s"%(e)
1764 			sys.exit(0)
1765 
1766 
1767 	def checkversion(self):
1768 
1769 		v = self.db.checkversion("emen2client")
1770 
1771 		if distutils.version.StrictVersion(__version__) < distutils.version.StrictVersion(v):
1772 			print """
1773 Note: emen2client version %s is available; installed version is %s.
1774 
1775 EMAN2 now includes emen2client.py, so you may download a new nightly build of EMAN2,
1776 	http://ncmi.bcm.edu/ncmi/
1777 
1778 ...or visit the EMEN2 wiki and download emen2client.py directly:
1779 	http://blake.grid.bcm.edu/emanwiki/EMEN2/emen2client.py
1780 
1781 """%(v, __version__)
1782 
1783 		else:
1784 			print "emen2client version %s is up to date"%(__version__)
1785 
1786 
1787 
1788 
1789 
1790 
1791 class UploadController(EMEN2ClientController):
1792 	def setparser_add_option(self):
1793 		self.parser.add_option("--noninteractive","-q", action="store_true", help="Do not prompt for parameter values")
1794 		self.parser.add_option("--force", "-f", action="store_true", help="Force re-upload even if a sidecar is found")
1795 		self.parser.add_option("--metafile", action="store_true", dest="metafile", help="Attempt to read JAMES/JADAS metadata files (default)", default=True)
1796 		self.parser.add_option("--no-metafile", action="store_false", dest="metafile", help="Ignore metadata files")
1797 
1798 		usage = """%prog upload [options] <record type> <recid> <files to upload>
1799 
1800 Record type can be any valid database protocol.
1801 Some record types have special, application-specific handlers, e.g.:
1802 
1803 	ccd			CCD Frames
1804 	scan		Scanned micrographs
1805 	stack		Tomograms
1806 
1807 Other values, e.g. "volume", will create child records of that type, with 1 file per child record.
1808 
1809 Alternatively, you can use "none" for record type and the files will be attached directly to the specified record ID.
1810 
1811 		"""
1812 
1813 		self.parser.set_usage(usage)
1814 
1815 
1816 	def upload(self):
1817 		if self.action not in map_rectype_handler:
1818 			try:
1819 				rd = self.db.getrecorddef(self.action)
1820 			except:
1821 				print "Invalid record type: %s"%self.action
1822 				return
1823 
1824 
1825 		print ""
1826 		print "%s Files to upload:"%len(self.filenames)
1827 		for i in self.filenames:
1828 			print "\t",i
1829 
1830 		handlerclass = gethandler(rectype=self.action) # or NewRecordFileHandler
1831 		applyparams = self._get_param_values(handlerclass.request_params)
1832 
1833 		# There isn't a dict(options)...
1834 		options = {}
1835 		for k,v in self.options.__dict__.items():
1836 			options[k]=v
1837 
1838 
1839 		dbts = []
1840 		filecount = len(self.filenames)
1841 		for count, filename in enumerate(self.filenames):
1842 			dbt = handlerclass(
1843 				db=self.db,
1844 				rectype=self.action,
1845 				filename=filename,
1846 				recid=self.recid,
1847 				applyparams=applyparams,
1848 				pos=(count,filecount),
1849 				options=options
1850 			)
1851 			dbt.upload()
1852 
1853 
1854 
1855 	def check_args(self):
1856 		self.action = self.args[0]
1857 
1858 		try:
1859 			self.recid = int(self.args[1])
1860 		except:
1861 			raise Exception, "Record ID required as argument after upload action"
1862 
1863 
1864 		self.filenames = reduce(operator.concat, map(glob.glob, self.args[2:]))
1865 
1866 		if not self.filenames:
1867 			raise Exception, "No files specified"
1868 
1869 
1870 	def run(self):
1871 		self.login()
1872 		self.upload()
1873 
1874 
1875 	#########################
1876 	# Ugly console input code
1877 	#########################
1878 
1879 	def _get_param_values(self, params):
1880 
1881 		ret = {}
1882 		if self.options.noninteractive:
1883 			return ret
1884 
1885 		pds = self.db.getparamdef(params)
1886 		pds = dict([[i.get('name'), i] for i in pds])
1887 
1888 
1889 		for param in params:
1890 			pd = pds.get(param)
1891 			ret[param] = self._get_param_value_console(pd)
1892 		return ret
1893 
1894 
1895 
1896 	def _get_param_value_console(self, param):
1897 		name = param.get('name')
1898 
1899 		print "\n----- %s -----"%name
1900 		print "\tDescription: %s"%param.get('desc_long')
1901 		if param.get('defaultunits'):
1902 			print "\tUnits: %s"%param.get('defaultunits')
1903 
1904 
1905 		punits = param.get('defaultunits') or ''
1906 		lines = None
1907 		selections = []
1908 
1909 		try:
1910 			lines = [i[0] for i in self.db.findvalue(name, '', True, False, 5)]
1911 		except Exception, inst:
1912 			print inst
1913 			pass
1914 
1915 		count = 0
1916 		if param.get('choices'):
1917 			print "\n\tSuggested values:"
1918 			for choice in param.get('choices', []):
1919 				print "\t\t%s) %s"%(count, choice)
1920 				selections.append(choice)
1921 				count += 1
1922 
1923 		if param.get('vartype') != "choice" and lines:
1924 			if param.get('choices'):
1925 				print "\n\tOther common values:"
1926 			else:
1927 				print "\n\tCommon values:"
1928 
1929 			for line in lines:
1930 				print "\t\t%s) %s"%(count, line)
1931 				selections.append(line)
1932 				count += 1
1933 
1934 		print "\n\t\t%s) None or N/A"%count
1935 		selections.append('')
1936 		count += 1
1937 
1938 		if param.get('vartype') != "choice":
1939 			print "\t\t%s) Enter a different not listed above"%count
1940 
1941 
1942 		while True:
1943 			inp = raw_input("\n\tSelection (0-%s): "%count)
1944 			try:
1945 				inp = int(inp)
1946 
1947 				if inp == count:
1948 					inp = raw_input("\tValue: ")
1949 				else:
1950 					inp = selections[inp]
1951 			except:
1952 				continue
1953 
1954 			break
1955 
1956 		return inp
1957 
1958 
1959 
1960 
1961 
1962 class DownloadController(EMEN2ClientController):
1963 
1964 	def setparser_add_option(self):
1965 		self.parser.add_option("--recurse", type="int",help="Recursion level",default=3)
1966 		self.parser.add_option("--overwrite", "-o", action="store_true", help="Overwrite existing files (default is to skip)", default=False)
1967 		self.parser.add_option("--rename", "-r", action="store_true", help="If a file already exists, save with format 'duplicate.recid:original_filename.dm3'", default=False)
1968 		self.parser.add_option("--sidecar", "-s", action="store_true", help="Include sidecar file with EMEN2 metadata in JSON format")
1969 		self.parser.add_option("--gzip", action="store_true", dest="compress", help="Decompress gzip'd files. Requires gzip in path (default)", default=True)
1970 		self.parser.add_option("--no-gzip", action="store_false", dest="compress", help="Do not decompress gzip'd files.")
1971 
1972 		usage = """%prog download [options] <recid> [filename-pattern]"""
1973 
1974 		self.parser.set_usage(usage)
1975 
1976 		# self.parser.add_option("--convert","-c",action="store_true",help="Convert files to mrc",default=0)
1977 		# self.parser.add_option("--invert","-i",action="store_true",help="Invert contrast",default=0)
1978 
1979 
1980 	def check_args(self):
1981 
1982 		if self.options.sidecar and not json:
1983 			print "Warning: sidecar support requires JSON"
1984 			self.options.sidecar = False
1985 
1986 		if len(self.args) < 1:
1987 			raise  optparse.OptionError, "Record ID Required"
1988 
1989 		self.recids = [int(i) for i in self.args]
1990 		self.patterns = []
1991 
1992 		#try:
1993 		#	self.recid = int(self.args[0])
1994 		#	self.patterns = self.args[1:] or []
1995 		#except Exception, inst:
1996 		#	raise  optparse.OptionError, "Record ID Required"
1997 
1998 
1999 	def run(self):
2000 		self.check_args()
2001 		self.login()
2002 
2003 		options = {}
2004 		for k,v in self.options.__dict__.items():
2005 			options[k] = v
2006 
2007 		for recid in self.recids:
2008 			rec = self.db.getrecord(recid)
2009 			handler = gethandler(rectype=rec.get('rectype'))
2010 			dbt = handler(db=self.db, recid=recid, options=options, pos=(0,1))
2011 			dbt.download()
2012 
2013 
2014 		return
2015 
2016 
2017 
2018 
2019 
2020 
2021 
2022 class SyncController(EMEN2ClientController):
2023 
2024 	def setparser_add_option(self):
2025 
2026 		self.parser.add_option("--check", action="store_true", help="Do not upload anything; just check file mappings")
2027 		self.parser.add_option("--ctf", action="store_true", help="Upload CTF Parameters")
2028 		self.parser.add_option("--boxes", action="store_true", help="Upload Boxes")
2029 		self.parser.add_option("--eman1", action="store_true", help="Look in EMAN1-style files instead of EMAN2 project database")
2030 		self.parser.add_option("--ptcls", action="store_true", help="Upload Particle Sets")
2031 		self.parser.add_option("--confirm", action="store_true", help="Request confirmation of mappings before proceeding")
2032 		self.parser.add_option("--clip_filename", type="string", help="Remove a substr from source filenames to aid matching, e.g. _filt_ptcls")
2033 		self.parser.add_option("--match", type="string", help="Restrict particle sets to this substring, e.g. for quick testing")
2034 		#self.parser.add_option("--check_boxsize", type="int", help="Check if a micrograph has been shrunk; if box_size < check_boxsize, zoom by box_size / check_boxsize")
2035 		self.parser.add_option("--shrink_factor", type="float", help="Specify a shrink factor (e.g. 0.25 if the boxed micrograph was reduced by a factor of 4)", default=1.0)
2036 
2037 
2038 		usage = """%prog sync [options] <project record ID>
2039 
2040 This program will upload CTF parameters, boxes, and particle sets into an EMEN2 database. Because filenames are not always globally unique, you must specify a base project that will be searched for files.
2041 
2042 If run with "--check", it will only test to see if it can make the correct file mappings between the local EMAN2 project and the remote database. Nothing will be written. This is a good way to test to see if the correct mappings can be made before you attempt to commit any changes.
2043 
2044 		"""
2045 
2046 		self.parser.set_usage(usage)
2047 
2048 
2049 	def check_args(self):
2050 		try:
2051 			self.project = int(self.args[0])
2052 		except:
2053 			raise Exception, "Project record ID required"
2054 
2055 		if EMAN2 == None:
2056 			raise Exception, "EMAN2 support is required"
2057 
2058 		if self.options.clip_filename:
2059 			# print "Debug: Clipping '%s' from filenames"%self.options.clip_filename
2060 			test = "filename_ok_filt.mrc"
2061 			test_clip = test.replace(self.options.clip_filename or '', "")
2062 			# print "\t%s -> %s"%(test, test_clip)
2063 
2064 
2065 
2066 	def run(self):
2067 
2068 		self.sources = set()
2069 		self.source_bdo = {}
2070 		self.source_ctf = {}
2071 		self.source_boxes = {}
2072 		self.source_box_size = {}
2073 		self.source_quality = {}
2074 
2075 		self.tmpdirs = []
2076 		self.projrecs = []
2077 
2078 		# Check arguments and auth
2079 		self.check_args()
2080 		self.login()
2081 
2082 		# Actually read EMAN1 or EMAN2 CTF/boxes
2083 		if self.options.eman1:
2084 			self.readEMAN1()
2085 		else:
2086 			self.readEMAN2()
2087 
2088 		# Check project and get remote records
2089 		self.getremoteinfo()
2090 
2091 		# Make mappings
2092 		self.findbinary()
2093 
2094 		# Upload
2095 		if self.options.ctf:
2096 			self.uploadctf()
2097 
2098 		if self.options.boxes:
2099 			self.uploadboxes()
2100 
2101 		if self.options.ptcls:
2102 			self.uploadptcls()
2103 
2104 		self.cleanup()
2105 
2106 
2107 	def cleanup(self):
2108 		if not self.tmpdirs:
2109 			return
2110 
2111 		print "You will want to remove the following tmp directories. They are not automatically removed to prevent accidental file loss in the event of a bug or other error."
2112 		for k in self.tmpdirs:
2113 			print k
2114 
2115 
2116 
2117 	def getremoteinfo(self):
2118 		# Since filenames are not guaranteed unique, restrict to a project...
2119 		print "\nChecking project %s on %s"%(self.project, self.db._host)
2120 
2121 		self.projrecs = self.db.getchildren(self.project, -1, ["ccd","scan"])
2122 		print "\tFound %s ccds/scans in remote project"%len(self.projrecs)
2123 
2124 		self.bdos = self.db.getbinary(self.projrecs)
2125 		print "\tFound %s binaries"%len(self.bdos)
2126 
2127 
2128 
2129 	def readEMAN1(self):
2130 		if self.options.ctf:
2131 			self.readEMAN1ctf()
2132 		if self.options.boxes:
2133 			self.readEMAN1boxes()
2134 
2135 
2136 	def readEMAN1ctf(self):
2137 		try:
2138 			f = open("ctfparm.txt", "r")
2139 			r = [i.strip() for i in f.readlines()]
2140 			f.close()
2141 		except:
2142 			print "No ctfparm.txt present; could not load EMAN1 CTF parameters"
2143 			return
2144 
2145 		indexes = {
2146 			"ctf_defocus_measured": 0, # multiply by -1
2147 			"ctf_bfactor": 3,
2148 			"tem_voltage": 10,
2149 			"aberration_spherical": 11,
2150 			"angstroms_per_pixel": 12,
2151 			"ctf_astig_angle": 2 ,
2152 			"ctf_astig_defocus_diff": 1,
2153 			"ctf_ampcont": 5, # multiply by 100
2154 		}
2155 
2156 		for i in r:
2157 			try:
2158 				source, params = i.split("\t")
2159 				params = params.split(",")
2160 
2161 				# it seems that if there are 14 params, astig params are inserted as 1,2
2162 				shift = 0
2163 				if len(params) == 14:
2164 					shift = 2
2165 
2166 				ctf = EMAN2.EMAN2Ctf()
2167 				ctf.defocus = float(params[0]) * -1
2168 				ctf.bfactor = float(params[1+shift])
2169 				ctf.ampcont = float(params[3+shift]) * 100
2170 
2171 				# print "defocus %s, bfactor %s, ampcont %s"%(ctf.defocus, ctf.bfactor, ctf.ampcont)
2172 				self.source_ctf[source] = ctf
2173 			except:
2174 				print "Unable to parse CTF parameters, skipping"
2175 
2176 
2177 	def readEMAN1boxes(self):
2178 		boxes = glob.glob("*.box")
2179 
2180 		if not boxes:
2181 			print "No EMAN1 .box files found"
2182 			return
2183 
2184 		print "Found EMAN1 boxes: %s"%boxes
2185 
2186 		for box in boxes:
2187 			try:
2188 				source = os.path.splitext(box)[0]
2189 				box_size, coords = self.readEMAN1box(box)
2190 			except:
2191 				print "Could not load data from box %s, skipping"%box
2192 				continue
2193 
2194 			self.source_box_size[source] = box_size
2195 			self.source_boxes[source] = coords
2196 
2197 
2198 	def readEMAN1box(self, box):
2199 		f = open(box, "r")
2200 		r = [i.split() for i in f.readlines()]
2201 		f.close()
2202 
2203 		coords = []
2204 		for b in r:
2205 			box_size = int(b[2])
2206 			xc = (int(b[0]) + (box_size/2)) / self.options.shrink_factor
2207 			yc = (int(b[1]) + (box_size/2)) / self.options.shrink_factor
2208 			coords.append([int(xc),int(yc)])
2209 
2210 		box_size = int(box_size / self.options.shrink_factor)
2211 
2212 		return box_size, coords
2213 
2214 
2215 	def readEMAN2(self):
2216 
2217 		###############
2218 		# Find all the raw images, particle sets, and CTF parameters in the local DB
2219 		###############
2220 
2221 		print "\nOpening EMAN2 local project"
2222 
2223 		projdb = EMAN2.db_open_dict("bdb:project")
2224 		ptclsets = projdb.get("global.spr_ptcls_dict", {})
2225 		e2ctfparms = EMAN2.db_open_dict("bdb:e2ctf.parms")
2226 		total = len(ptclsets)
2227 
2228 		# Read the EMAN2 managed particle sets
2229 		for count, (k,v) in enumerate(ptclsets.items()):
2230 			ref = v.get('Phase flipped') or v.get('Original Data')
2231 
2232 			if not ref:
2233 				print "No particles found for %s, skipping"%k
2234 				continue
2235 
2236 			print "%s of %s: Getting info for particle set %s from %s"%(count+1, total, k, ref)
2237 
2238 			d = EMAN2.db_open_dict(ref)
2239 
2240 			coords = []
2241 			maxrec = d['maxrec']
2242 			if maxrec == None:
2243 				print "No particles in %s, skipping"%(k)
2244 				d.close()
2245 				continue
2246 
2247 
2248 			# Get info from first particle in stack
2249 			ptcl = d[0]
2250 
2251 			try: ctf = ptcl.get_attr("ctf")
2252 			except: ctf = None
2253 
2254 			try: box_size = int(ptcl.get_attr("nx") / self.options.shrink_factor)
2255 			except: box_size = None
2256 
2257 			# Mangle source name if necessary
2258 			try:
2259 				source = os.path.basename(ptcl.get_attr('ptcl_source_image'))
2260 			except:
2261 				source = k
2262 				source = source.split("_ptcl")[0]
2263 				print "\tUnable to get source image %s, using %s for the filename search"%(k,k)
2264 
2265 
2266 			# Try to read boxes from particle headers
2267 			if self.options.boxes:
2268 				for i in range(maxrec+1):
2269 					dptcl = d[i]
2270 					try:
2271 						x, y = dptcl.get_attr('ptcl_source_coord')
2272 						x /= self.options.shrink_factor
2273 						y /= self.options.shrink_factor
2274 						coords.append([int(x), int(y)])
2275 					except:
2276 						coords.append(None)
2277 
2278 
2279 				if None in coords:
2280 					print "\tSome particles for %s did not specify coordinates"%k
2281 					coords = []
2282 					# self.options.boxes = False
2283 
2284 				print "Got box_size %s and coords %s"%(box_size, coords)
2285 
2286 
2287 
2288 			# Get alternative CTF and quality from e2ctfit settings
2289 			ctf2, quality = self.readEMAN2e2ctfparms(e2ctfparms, k)
2290 			if not ctf and ctf2:
2291 				print "\tUsing CTF parameters from bdb:e2ctf.parms#%s"%k
2292 				ctf = ctf2
2293 
2294 			if ctf:
2295 				self.source_ctf[source] = ctf
2296 				print "Got CTF: defocus %s, B-factor %s"%(ctf.defocus, ctf.bfactor)
2297 			else:
2298 				print "\tNo CTF for %s"%k
2299 
2300 			if box_size and coords:
2301 				self.source_box_size[source] = box_size
2302 				self.source_boxes[source] = coords
2303 
2304 			if quality:
2305 				self.source_quality[source] = quality
2306 
2307 			d.close()
2308 
2309 
2310 		# If we can't find any EMAN2 managed particle sets, at least check e2ctf.parms for any CTF params
2311 		if not ptclsets:
2312 			print "No EMAN2 managed particle sets found; checking e2ctf.parms for CTF instead"
2313 			# self.options.boxes = False
2314 
2315 			for k,v in e2ctfparms.items():
2316 				ctf, quality = self.readEMAN2e2ctfparms(e2ctfparms, k)
2317 				source = k.split("_ptcl")[0]
2318 
2319 				if ctf:
2320 					self.source_ctf[source] = ctf
2321 				if quality:
2322 					self.source_quality[source] = quality
2323 
2324 		print "\n%s Files in local project: "%len(self.source_ctf), self.source_ctf.keys()
2325 
2326 		projdb.close()
2327 		e2ctfparms.close()
2328 
2329 
2330 
2331 	def readEMAN2e2ctfparms(self, e2ctfparms, item):
2332 		ctf2 = None
2333 		quality = None
2334 
2335 		ctf_str = e2ctfparms.get(item)
2336 		if ctf_str:
2337 			try:
2338 				ctf2 = EMAN2.EMAN2Ctf()
2339 				ctf2.from_string(ctf_str[0])
2340 				quality = ctf_str[-1]
2341 			except:
2342 				print "\tProblem reading CTF from bdb:e2ctf.parms for %s"%item
2343 
2344 		return ctf2, quality
2345 
2346 
2347 	def __filematch(self, i, j):
2348 		if i in j: return True
2349 
2350 
2351 	def findbinary(self):
2352 
2353 		###############
2354 		# For each file in the local project, search for a matching BDO and record ID in the database
2355 		###############
2356 
2357 		self.sources = set(self.source_ctf.keys() + self.source_boxes.keys())
2358 		total = len(self.sources)
2359 
2360 		bdosbyfilename = dict([[i["filename"], i] for i in self.bdos])
2361 		filenames = bdosbyfilename.keys()
2362 
2363 		ambiguous = {}
2364 		nomatches = {}
2365 
2366 		# ian: new style: I fixed getbinary, so just do one big BDO lookup instead of many db.findbinary
2367 
2368 		print "remote filenames?:"
2369 		print filenames
2370 
2371 		for count, source in enumerate(self.sources):
2372 			print "\n%s of %s: Looking up file %s in project %s"%(count+1, total, source, self.project)
2373 
2374 			q = source.replace(self.options.clip_filename or '', "")
2375 			gzipstrip = ".gz" in source
2376 
2377 			# ian: replace this with functools.partial
2378 			# matches = map(bdosbyfilename.get, filter(lambda x:q in x.split("."), filenames))
2379 			matches = []
2380 			for i in filenames:
2381 				i2 = i
2382 				if gzipstrip:
2383 					i2 = i.replace('.gz','')
2384 				#if q in i2:
2385 				if self.__filematch(q, i2):
2386 					matches.append(bdosbyfilename.get(i))
2387 
2388 
2389 			if len(matches) > 1:
2390 				print "\tAmbiguous match for %s: %s"%(source, [match["filename"] for match in matches])
2391 				ambiguous[source] = matches
2392 				continue
2393 
2394 
2395 			if len(matches) == 0:
2396 				print "\tNo matches for %s"%source
2397 				nomatches[source] = []
2398 				continue
2399 
2400 
2401 			match = matches[0]
2402 			print "\tFound %s in record %s, matching filename is %s"%(match["name"], match["recid"], match["filename"])
2403 
2404 			self.source_bdo[source] = match
2405 
2406 
2407 		print "\n\nSuccessful Local-Remote Mappings:"
2408 		for k,v in self.source_bdo.items():
2409 			print "\t%s -> %s (recid %s)"%(k, v["filename"], v["recid"])
2410 
2411 
2412 		if ambiguous:
2413 			print "\n\nAmbiguous matches:"
2414 			for k,v in ambiguous.items():
2415 				print "\t%s -> %s"%(k, v)
2416 		if nomatches:
2417 			print "\n\nNo matches:"
2418 			for k in nomatches:
2419 				print "\t%s"%k
2420 
2421 
2422 		if self.options.confirm:
2423 			answer = raw_input("\nContinue with these mappings Y/N? ")
2424 			if not answer.lower() in ["y","yes"]:
2425 				print "Mappings rejected; SyncController exiting"
2426 				# ian: do this in a better way, but not a harsh sys.exit
2427 				self.options.ctf = False
2428 				self.options.boxes = False
2429 
2430 
2431 	def uploadctf(self):
2432 
2433 		###############
2434 		# Now that we have found all source images, CTF parameters, and database references, prepare to save in DB.
2435 		###############
2436 
2437 		recs = [i["recid"] for i in self.source_bdo.values()]
2438 		recs = self.db.getrecord(recs)
2439 		recs = dict([(i["recid"],i) for i in recs])
2440 		putrecs = []
2441 
2442 		print "\nGot %s records for CTF parameter upload"%len(recs)
2443 
2444 		for source, match in self.source_bdo.items():
2445 
2446 			ctf = self.source_ctf.get(source)
2447 			quality = self.source_quality.get(source)
2448 
2449 			if not ctf:
2450 				print "No CTF found for %s"%source
2451 				continue
2452 
2453 			rec = recs[match["recid"]]
2454 
2455 			if rec.get("ctf_defocus_measured") == ctf.defocus and rec.get("ctf_bfactor") == ctf.bfactor and rec.get("ctf_ampcont") == ctf.ampcont and rec.get("assess_image_quality") == quality:
2456 				print "%s already has current CTF parameters"%source
2457 				continue
2458 
2459 			rec["ctf_defocus_measured"] = ctf.defocus
2460 			rec["ctf_bfactor"] = ctf.bfactor
2461 			rec["ctf_ampcont"] = ctf.ampcont
2462 
2463 			if quality != None:
2464 				rec["assess_image_quality"] = quality
2465 
2466 			putrecs.append(rec)
2467 
2468 		###############
2469 		# Store / upload
2470 		###############
2471 
2472 		print "\nCommitting %s updated records with changed CTF..."%(len(putrecs))
2473 
2474 		self.db.putrecord(putrecs)
2475 
2476 
2477 
2478 	def uploadboxes(self):
2479 
2480 		print "\nPreparing to upload boxes..."
2481 		newboxes = []
2482 
2483 		for source, boxes in self.source_boxes.items():
2484 
2485 			bdo = self.source_bdo.get(source)
2486 			box_size = self.source_box_size[source]
2487 
2488 			if not bdo:
2489 				print "\tNo bdo for source %s..."%source
2490 				continue
2491 
2492 			recid = bdo.get("recid")
2493 
2494 			# Check remote site for existing boxes
2495 			remoteboxes = self.db.getchildren(recid, -1, ["box"])
2496 
2497 			if len(remoteboxes) == 1:
2498 				print "\tUpdating existing box record"
2499 				newbox = self.db.getrecord(remoteboxes.pop())
2500 
2501 			else:
2502 				if len(remoteboxes) > 1:
2503 					print "\tNote: more than one box record already specified!"
2504 
2505 				print "\tCreating new box record"
2506 				newbox = self.db.newrecord("box", recid)
2507 
2508 			print "\t%s / %s has %s boxes with box size %s"%(source, recid, len(boxes), box_size)
2509 
2510 			newbox["box_coords"] = boxes
2511 			newbox["box_size"] = box_size
2512 
2513 			newboxes.append(newbox)
2514 
2515 			# print "-----"
2516 			# print newbox
2517 			# print "-----"
2518 
2519 		print "\tCommitting %s box records"%(len(newboxes))
2520 		newrecs = self.db.putrecord(newboxes)
2521 		# print "\t... %s"%", ".join([i.get('recid') for i in newrecs])
2522 
2523 
2524 
2525 	def uploadptcls(self):
2526 
2527 		print "\nUploading particles..."
2528 
2529 		print "Not implemented yet"
2530 
2531 		for source in self.source_ctf.keys():
2532 			pass
2533 
2534 
2535 
2536 
2537 
2538 
2539 ##################################
2540 # Mappings
2541 ##################################
2542 
2543 map_filename_rectype = {
2544 	".st":"stack",
2545 	".dm3":"ccd",
2546 	".tif":"jadas"
2547 }
2548 
2549 map_rectype_handler = {
2550 	"ccd": CCDHandler,
2551 	"stack": StackHandler,
2552 	"scan": ScanHandler,
2553 	"volume": VolumeHandler,
2554 	"micrograph": MicrographHandler,
2555 	"jadas": JADASHandler,
2556 	"none": FileHandler
2557 }
2558 
2559 
2560 def gethandler(filename=None, rectype=None):
2561 	if not rectype:
2562 		ext = os.path.splitext(filename)[-1]
2563 		rectype = map_filename_rectype.get(ext)
2564 	return map_rectype_handler.get(rectype, FileHandler)
2565 
2566 
2567 
2568 
2569 ##################################
2570 # Exported utility methods
2571 ##################################
2572 
2573 def opendb(host='http://ncmidb.bcm.edu', username=None, password=None, json=False):
2574 	username = username or raw_input("Username: ")
2575 	password = password or getpass.getpass("Password: ")
2576 	if json:
2577 		db = DBJSONRPCProxy(host=host)
2578 	else:
2579 		db = DBXMLRPCProxy(host=host)
2580 	db._login(username=username, password=password)
2581 	return db
2582 
2583 
2584 def sign(a):
2585 	if a>0: return 1
2586 	return -1
2587 
2588 
2589 
2590 
2591 ##################################
2592 # Main()
2593 ##################################
2594 
2595 
2596 def print_help():
2597 	print """%s <action>
2598 
2599 Actions available: upload, download
2600 
2601 For detailed help: %s <action> --help
2602 
2603 	"""%(sys.argv[0],sys.argv[0])
2604 
2605 
2606 
2607 
2608 def main():
2609 	parser = DBRPCOptions()
2610 
2611 	controllers = {
2612 		"download":DownloadController,
2613 		"upload":UploadController,
2614 		"sync":SyncController
2615 	}
2616 
2617 	try:
2618 		action = sys.argv[1]
2619 	except:
2620 		action = "help"
2621 
2622 	if action in ["-h","--help","help"]:
2623 		return print_help()
2624 
2625 	try:
2626 		if len(sys.argv) == 2:
2627 			sys.argv.append("-h")
2628 		controller = controllers.get(action)(args=sys.argv[2:])
2629 
2630 	except Exception, inst:
2631 		print "Error: %s"%inst
2632 		sys.exit(0)
2633 
2634 	controller.run()
2635 
2636 
2637 
2638 if __name__ == "__main__":
2639 	main()
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
[get | view] (2011-10-26 21:54:24, 66.5 KB) [[attachment:emen2client.py]]
You are not allowed to attach a file to this page.