Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package DB2Data :: Module DB2PDB
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.DB2Data.DB2PDB

   1  #!/usr/bin/env python 
   2  ############################## 
   3  # DB2PDB.py                  # 
   4  # Import data from database  # 
   5  ############################## 
   6   
   7  ###################################################### 
   8  # Copyright (c) 2007-2008 Andrzej Bak                # 
   9  # ARC Seibersdorf & University of Silesia            # 
  10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
  11  # License: GNU General Public License, version: 3    # 
  12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
  13  # Version: 1, 06.01.2010                             # 
  14  ###################################################### 
  15   
  16  try: 
  17      import sys 
  18      import os 
  19      from openeye.oechem import * 
  20      from MoSTBioDat.DataBase.ImportData.Data2DB.TaBuilder import TaBuilder 
  21      from MoSTBioDat.DataBase.ImportData.Data2DB.InserTables import all 
  22      from MoSTBioDat.DataBase.ImportData.Data2DB.InsertMacMolTables import InsertMacMolTables 
  23      from MoSTBioDat.DataBase.Connect.MoSTBioDatErrors import Error 
  24      from MoSTBioDat.DataBase.ImportData.Data2DB.PDBFile import Molecule, Atom, createPDBFile 
  25  except ImportError,e: 
  26      print 'Error: %s' %e 
  27      sys.exit(1) 
  28  ########## DB2PDB class ############################## 
29 -class DB2PDB(TaBuilder,InsertMacMolTables):
30 """ 31 Import data from MacroMolecule DataBase into PDB file 32 INPUT: 33 id - int, compound Id in Entry table, default False 34 pdbcode - str, compound PDB code, default False 35 dirpath - str, directory path 36 pdbfile - str, pdb file path 37 modelid - int, model id list or integer, default empty list 38 tags - boolean, get tags, default False 39 lowercasetablenames, boolean, MySQL lower case table setting, default True 40 host - string, host to connect 41 user - string, user to connect as 42 passwd - string, password to use 43 db - string, database to use 44 port - integer, TCP/IP port to connect 45 log - boolean, logging flag 46 ligandb - string, ligand database name 47 repflag - boolean, print report, default False 48 rmHetmol - boolean, remove heteromolecule, default False 49 unix_socket - string, location of unix_socket to use 50 conv - conversion dictionary, see MySQLdb.converters 51 connect_timeout - number of seconds to wait before the connection attempt fails. 52 compress - if set, compression is enabled 53 named_pipe - if set, a named pipe is used to connect (Windows only) 54 init_command - command which is run once the connection is created 55 read_default_file - file from which default client values are read 56 read_default_group - configuration group to use from the default file 57 cursorclass - class object, used to create cursors (keyword only) 58 use_unicode - if True, text-like columns are returned as unicode objects 59 using the connection's character set. Otherwise, text-like 60 columns are returned as strings. columns are returned as 61 normal strings. Unicode objects will always be encoded to 62 the connection's character set regardless of this setting. 63 charset - if supplied, the connection character set will be changed 64 to this character set (MySQL-4.1 and newer). This implies 65 use_unicode=True. 66 sql_mode - if supplied, the session SQL mode will be changed to this 67 setting (MySQL-4.1 and newer). For more details and legal 68 values, see the MySQL documentation. 69 client_flag - integer, flags to use or 0 70 (see MySQL docs or constants/CLIENTS.py) 71 ssl - dictionary or mapping, contains SSL connection parameters; 72 see the MySQL documentation for more details 73 (mysql_ssl_set()). If this is set, and the client does not 74 support SSL, NotSupportedError will be raised. 75 local_infile - integer, non-zero enables LOAD LOCAL INFILE; zero disables 76 format - string format for log handler 77 filter - filter object from logger object 78 datefmt - data/time format 79 path - directory path to log file 80 filename - log filename, default log 81 filemode - mode to open log file, default='a' 82 level - set root logger level to specified level 83 logfilelevel- set level to log file 84 cache - create cache for query, default=True 85 scheme2file - Boolean - save database scheme to shelve file 86 87 OUTPUT: 88 pdb file 89 """
90 - def __init__(self,dirpath='',pdbfile='',host='localhost',db='macmol',user=None,passwd=None,port=3306,ligandb=' ',log=False,**kwargs):
91 TaBuilder.__init__(self,host,db,user,passwd,port,log,**kwargs) 92 InsertMacMolTables.__init__(self) 93 self.ligandb=ligandb 94 if self.ligandb.isspace(): 95 self.ligandb=None 96 if not self.kwargs.has_key('repflag'): 97 self.kwargs.setdefault('repflag',False) 98 try: 99 self.kwargs['pdbfile']=str(pdbfile) 100 except ValueError,e: 101 print 'Error: %s' %e 102 sys.exit(1) 103 self.kwargs['dirpath']=dirpath 104 self.kwargs.update(kwargs) 105 106 if not self.kwargs.has_key('id'): 107 self.kwargs.setdefault('id',False) 108 if not self.kwargs.has_key('pdbcode'): 109 self.kwargs.setdefault('pdbcode',False) 110 if not self.kwargs.has_key('modelid'): 111 self.kwargs.setdefault('modelid',[]) 112 if not self.kwargs.has_key('logdebug'): 113 self.kwargs.setdefault('logdebug',False) 114 if not self.kwargs.has_key('lowercasetablenames'): 115 self.kwargs.setdefault('lowercasetablenames',True) 116 if not self.kwargs.has_key('rmHetmol'): 117 self.kwargs.setdefault('rmHetmol',True) 118 # if not self.kwargs.has_key('atcharge'):# 119 # self.kwargs.setdefault('atcharge','Charge') 120 if not self.kwargs.has_key('tags'): 121 self.kwargs.setdefault('tags',False) 122 123 macmoldict,headerdict=self.db2mol(tabcolvaldict={'Id':self.kwargs['id'],'PDBCode':self.kwargs['pdbcode']}, 124 modelid=self.kwargs['modelid'],logdebug=self.kwargs['logdebug'],lowercasetablenames=self.kwargs['lowercasetablenames']) 125 if macmoldict: 126 createPDBFile(macromolecule=macmoldict,hetat=(not self.kwargs['rmHetmol']),headerdict=headerdict,dirpath=self.kwargs['dirpath'],pdbfile=self.kwargs['pdbfile']) 127 self.closeDB()
128
129 - def db2mol(self,logdebug=False,tabcolvaldict={},lowercasetablenames=True,**kwarg):
130 """ 131 INPUT: 132 logdebug - boolean, debug logging 133 tabcolvalidict - table column/value dictionary 134 lowercasetablename - boolean, MySQL lower case table names setting 135 OUTPUT: 136 moldict - dict, molecule dictionary 137 """ 138 print 'Importing %s table description.' %self.kwargs['db'] 139 ResIDict={} 140 ElemTabIDict={} 141 revtags={} 142 tags={} 143 atcharge=[] 144 macdbname=self.kwargs['db']#get macromolecule database name 145 if self.status=='Disconnected': 146 print 'Error: Please reconnect to database! Connection has been closed by previous function.' 147 if logdebug: 148 self.log.error('Please reconnect to database. Connection has been closed by previous function.') 149 sys.exit(1) 150 self.macmoltabdesc=self.genTables()#generate table description 151 if self.ligandb: 152 try: 153 self.kwargs['db']=self.ligandb#set db to ligand database 154 print 'Importing %s table description.' %self.kwargs['db'] 155 self.tabdesc=self.genTables() 156 157 if lowercasetablenames: 158 name='propdef' 159 else: 160 name='PropDef' 161 propdefdict={name:[ 162 ['AtSeqNum','pdb atom sequence number'], 163 ['AtPDBSymbol','pdb atom symbol'], 164 ['AtOccup','pdb atom occupancy'], 165 ['AtBFactor','pdb B factor'], 166 ['Charge','atom charge'], 167 ['AtAltLoc','atom alternate location'], 168 ['AtInsCode','code for insertion of residues'], 169 ['EntryIdFk','macromolecule database Entry table foreign key'] 170 ]} 171 propdeflist=propdefdict[name] 172 ### import to Ligand.PropDef table ### 173 PropDefIDict=self.PropDef(lowercasetablenames=lowercasetablenames,**propdefdict) 174 if not PropDefIDict: 175 print 'Error: Incorrect import to PropDef table' 176 if logdebug: 177 self.log.error('Incorrect import to PropDef table') 178 return None, None 179 ResIDict['PropDefIDict']=PropDefIDict 180 ### get ElemTab table ### 181 ElemTabIDict=self.getElemTab(lowercasetablenames=lowercasetablenames) 182 183 tablename='ElemTab' 184 if not ElemTabIDict: 185 print 'Data in %s not available!'%tablename 186 self.log.error('Data in %s not available',tablename) 187 return None, None 188 189 # if not isinstance(self.kwargs['atcharge'],str): 190 # print 'Error: Incorrect atom charge definition! Must be string. Please correct it ...' 191 # self.log.error('Incorrect atom charge definition. Must be string') 192 # return 193 ### get PropDef table ### 194 propdefdict=self.getPropDef(lowercasetablenames=lowercasetablenames) 195 tablename='PropDef' 196 if not propdefdict: 197 print 'Data in %s not available!'%tablename 198 self.log.error('Data in %s not available',tablename) 199 return None, None 200 # atcharge,refatcharge=self.checkPropId([self.kwargs['atcharge']], propdefdict) 201 # if refatcharge: 202 # print 'Warnings: Specified tag: %s not available in database!' %refatcharge 203 # self.log.info('Specified tag: %s not available in database',refatcharge) 204 # return 205 tags=[tag[0] for tag in propdeflist] 206 tags,reftags=self.checkPropId(tags, propdefdict) 207 revtags=dict([(tagval,tagkey) for tagkey, tagval in tags.iteritems()]) 208 self.kwargs['db']=macdbname# set db to macromolecule database 209 210 except Error,e: 211 print 'Error: %s'%e 212 return None, None 213 print 'Importing data, please wait ...' 214 modelid=kwarg.get('modelid',[]) 215 if modelid: 216 modelid=self.checkId(modelid)### check specified model Id ### 217 if not modelid: return None, None 218 219 entryid=tabcolvaldict.get('Id',None)#get Id 220 pdbcode=tabcolvaldict.get('PDBCode',None)#get PDBCode 221 if not isinstance(pdbcode,str): 222 print 'Error: Invalid PDB code specification! Must be string!' 223 self.log.error('Invalid PDB code specification! Must be string') 224 return None, None 225 if not (entryid or pdbcode): 226 print 'Error: Id and PDBCode not specified!' 227 self.log.error('Id and PDBCode not specified') 228 return None, None 229 if not entryid:#specified only pdbcode 230 lenpdbcode=len(pdbcode) 231 if lenpdbcode<10:#pdbcode have length 10 232 pdbcode=pdbcode+(10-lenpdbcode)*' ' 233 ### get Entry ID ### 234 entryid=self.getEntry(logdebug=logdebug,tabcolvaldict={'PDBCode':pdbcode},lowercasetablenames=lowercasetablenames,conflag=True) 235 entryid=self.checkId(entryid)#check Entry Id 236 if not entryid: 237 print 'Error: Incorrect Entry Id specification! Must be integer, long or list of integers!' 238 self.log.error('Incorrect Entry Id specification') 239 return None, None 240 ### get Model ID ### 241 ModelIDict=self.getModel(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 242 if not ModelIDict: 243 print 'Error: Incorrect ID for Model table.' 244 self.log.error('Incorrect ID for Model table') 245 return None, None 246 if not modelid:#if not specified models get all available models 247 modelid=ModelIDict.keys() 248 ### check if specified models in database ### 249 corrmodel,refmodel=self.checkIDB(modelid,ModelIDict.keys())#correct models, refuse models 250 if refmodel: 251 print 'Warning: Specified %s models not available in database!'%refmodel 252 self.log.warning('Specified %s models not available in database',refmodel) 253 if not corrmodel: 254 print 'Error: Specified %s models not available in database!'%refmodel 255 self.log.error('Specified %s models not available in database',refmodel) 256 return None, None 257 revModelIDict=dict([(ModelIDict[modelid],modelid) for modelid in corrmodel])#reverse Model dictionary {model_id: model_specification} 258 ### get Chain ID ### 259 ChainIDict=self.getChain(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 260 if not ChainIDict: 261 print 'Error: Incorrect ID for Chain table' 262 self.log.error('Incorrect ID for Chain table') 263 return None, None 264 revChainIDict=dict([(int(chainval[0]), chainkey) for chainkey,chainval in ChainIDict.iteritems()])#reverse Chain dictionary 265 ### get Alternate Location ID ### 266 AltLocIDict=self.getAltLoc(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 267 if AltLocIDict: 268 revAltLocIDict=dict([(altval[0], altkey) for altkey,altval in AltLocIDict.iteritems() if altkey.isalpha()])#reverse Chain dictionary 269 else: 270 revAltLocIDict={} 271 ## get Molecule ID ### 272 MoleculeIDict,ProtStatIDict=self.getMolecule(logdebug=logdebug, tabcolvaldict=tabcolvaldict, lowercasetablenames=lowercasetablenames,ID=entryid,IDict=revChainIDict) 273 if not MoleculeIDict: 274 print 'Error: Incorrect ID for Molecule table' 275 self.log.error('Incorrect ID for Molecule table') 276 return None, None 277 278 if self.kwargs['rmHetmol'] and ProtStatIDict: 279 AtomsIDict={} 280 ### create Atoms dictionary {protstatid:[isosmi,{atomsid:[atom_position, atom_number]}]} ### 281 for isosmi,protstatval in ProtStatIDict.iteritems(): 282 atomsid=protstatval[1] 283 protstatval[1]=dict([(item[0][0],[item[1]+1,item[0][1]]) for item in zip(atomsid,xrange(0,len(atomsid)))]) 284 protstatid=protstatval[0] 285 AtomsIDict[protstatval[0]]=[isosmi,protstatval[1]] 286 287 ResidueIDict={}#residue dictionary 288 HetMolIDict={}#heteromolecule dictionary 289 for MoleculeKey, MoleculeVal in MoleculeIDict.iteritems(): 290 moltype=MoleculeVal.get('MolType',None)#get molecule type 291 chemcompidfk=MoleculeVal.get('ChemCompIdFk',None) 292 if moltype=='P':#protein 293 ### create Residue dictionary {residueid: {'ResName': residue_name,'SeqNum': sequence_number,'ResInsCode':residue_insertion_code,'ChainIdFk': chain_foreign_key,'ModelIdFk':model_foreign_key} 294 residueId=self.getResidue(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=[MoleculeKey],IDict=revModelIDict) 295 if not residueId: 296 print 'Error: Incorrect ID for Residue table' 297 self.log.error('Incorrect ID for Residue table') 298 return None, None 299 ResidueIDict.update(residueId) 300 else: #not protein 301 if not self.ligandb: continue#ligand database not specified 302 if self.kwargs['rmHetmol']: continue#remove heteromolecule option specified 303 if chemcompidfk is None: continue#chemcomp foreign key not available 304 ### create HetMol dictonary {hetmolid:{'ModelIdFk': model_foreign_key, 'ConfStatIdFk': conformer_foreign_key, 'ChainIdFk': chain_foreign_key, 'ProtStatIdFk': protstat_foreign_key', 305 ### 'SeqNum': sequence_number, 'PDBNumHetAt': heteroatom_number }} 306 hetmolId=self.getHetMol(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=[MoleculeKey],IDict=revModelIDict) 307 if not hetmolId: 308 print 'Error: Incorrect ID for HetMol table' 309 self.log.error('Incorrect ID for HetMol table') 310 return None, None 311 molcode=MoleculeVal.get('MolCode',None)#get molecule 3 letter code 312 for hetmolval in hetmolId.values(): 313 hetmolval.update({'MolCode':molcode}) 314 HetMolIDict.update(hetmolId) 315 316 macmoldict={}#macromolecule dictionary 317 ### iterate Residue dictionary ### 318 for ResidueID,resdict in ResidueIDict.iteritems(): 319 ### Residue Atoms dictionary {ResAtomId:{'AtPDBSymbol':atom_pdb_symbol,'ElemTabIdFk':ElemTab_foreign_key,'AtSegNum':atom_sequence_number,'AltLocIdFk':alternate_location}} 320 ResAtomsIDict=self.getResAtoms(logdebug=logdebug, tabcolvaldict=tabcolvaldict, lowercasetablenames=lowercasetablenames,ID=[ResidueID]) 321 if not ResAtomsIDict: 322 print 'Error: Incorrect ID for ResAtoms table' 323 self.log.error('Incorrect ID for ResAtoms table') 324 return None, None 325 ### Residue Atom Stat dictionary {ResAtomIdFk1:[posX,posY,posZ,AtOccup,AtBFact],ResAtIdFk2:[.....],...} 326 ResAtomStatIDict=self.getResAtomStat(logdebug=logdebug, tabcolvaldict=tabcolvaldict, lowercasetablenames=lowercasetablenames,ID=[ResidueID]) 327 if not ResAtomStatIDict: 328 print 'Error: Incorrect ID for ResAtomStat table' 329 self.log.error('Incorrect ID for ResAtomStat table') 330 return None, None 331 resmacid,macobj=self.createmacmoldict(resdict=resdict,chaindict=revChainIDict,modeldict=revModelIDict,resatomsdict=ResAtomsIDict,resatomstatdict=ResAtomStatIDict,altlocdict=revAltLocIDict,elemtabdict=ElemTabIDict,hetflag=False) 332 if not macobj: 333 return None, None 334 macmoldict[resmacid]=macobj 335 336 hetmoldict={}#heteromolecule dictonary 337 ### get property dictionary {property_id: property_name} for specified property names 338 atseqnum='AtSeqNum' 339 atseqnum=revtags.get(atseqnum,None) 340 if atseqnum and tags.has_key(atseqnum): 341 atseqnum=dict([(atseqnum,tags[atseqnum])]) 342 else: 343 atseqnum={} 344 345 atpdbsymbol='AtPDBSymbol' 346 atpdbsymbol=revtags.get(atpdbsymbol,None) 347 if atpdbsymbol and tags.has_key(atpdbsymbol): 348 atpdbsymbol=dict([(atpdbsymbol,tags[atpdbsymbol])]) 349 else: 350 atpdbsymbol={} 351 352 atoccup='AtOccup' 353 atoccup=revtags.get(atoccup,None) 354 if atoccup and tags.has_key(atoccup): 355 atoccup=dict([(atoccup,tags[atoccup])]) 356 else: 357 atoccup={} 358 359 atbfactor='AtBFactor' 360 atbfactor=revtags.get(atbfactor,None) 361 if atbfactor and tags.has_key(atbfactor): 362 atbfactor=dict([(atbfactor,tags[atbfactor])]) 363 else: 364 atbfactor={} 365 366 ataltloc='AtAltLoc' 367 ataltloc=revtags.get(ataltloc,None) 368 if ataltloc and tags.has_key(ataltloc): 369 ataltloc=dict([(ataltloc,tags[ataltloc])]) 370 else: 371 ataltloc={} 372 373 atinscode='AtInsCode' 374 atinscode=revtags.get(atinscode,None) 375 if atinscode and tags.has_key(atinscode): 376 atinscode=dict([(atinscode,tags[atinscode])]) 377 else: 378 atinscode={} 379 380 ### iterate HetMol dictionary ### 381 for HetMolID, resdict in HetMolIDict.iteritems(): 382 protstatid=resdict['ProtStatIdFk']#get ProtStat foreign key 383 if protstatid is None: 384 print 'Warning: ProtStat foreing key not available in HetMol table (Id: %s).'%HetMolID 385 print 'Warning: Heteromolecule extraction failed!' 386 self.log.warning('ProtStat foreing key not available in HetMol table (Id: %s).',HetMolID) 387 self.log.warning('Hetermolecule extraction failed') 388 hetmoldict={} 389 break 390 confstatid=resdict['ConfStatIdFk'] 391 if confstatid is None: 392 print 'Warning: ConfStat foreing key not available in HetMol table (Id: %s).'%HetMolID 393 print 'Warning: Heteromolecule extraction failed!' 394 self.log.warning('ConfStat foreing key not available in HetMol table (Id: %s).',HetMolID) 395 self.log.warning('Hetermolecule extraction failed') 396 hetmoldict={} 397 break 398 self.kwargs['db']=self.ligandb#set database to ligand 399 ### search Atoms table ### 400 tablename='Atoms' 401 if lowercasetablenames: 402 tablename=tablename.lower() 403 atomsid=self.getAtomsonIdFk(logdebug=logdebug,tabcolvaldict={'ProtStatIdFk':[protstatid]},lowercasetablenames=lowercasetablenames) 404 if not atomsid: 405 print 'There is no specified input in %s table!'%tablename 406 self.log.info('There is no specified input in %s',tablename) 407 return None, None 408 # ### search ConnTab table ### 409 # tablename='ConnTab' 410 # if lowercasetablenames: 411 # tablename=tablename.lower() 412 # conntabid=self.getConnTabonIdFk(logdebug=logdebug,tabcolvaldict={'ProtStatIdFk':[protstatid]},lowercasetablenames=lowercasetablenames) 413 # if not conntabid: 414 # print 'Warning: There is no specified input in %s table!'%tablename 415 # self.log.warning('There is no specified input in %s',tablename) 416 417 ### search AtomStat table ### 418 tablename='AtomStat' 419 if lowercasetablenames: 420 tablename=tablename.lower() 421 atomstatid=self.getAtomStatonIdFk(logdebug=logdebug,tabcolvaldict={'ConfStatIdFk':[confstatid]},lowercasetablenames=lowercasetablenames) 422 if not atomstatid: 423 print 'There is no specified input in %s table!'%tablename 424 self.log.error('There is no specified input in %s',tablename) 425 return None, None 426 427 atomsidict=dict([(item[0][0],[item[1]+1,item[0][1]]) for item in zip(atomsid,xrange(0,len(atomsid)))]) 428 # corrlist=[] 429 pdbsymbolcorrlist=[] 430 for confstatid, coords in atomstatid.iteritems(): 431 if len(coords) !=len(atomsidict): 432 print 'Error: Different length for coordinate lists!' 433 self.log.error('Different length for coordinate list') 434 return None, None 435 for coord in coords:#iterate coordinates 436 ### join AtomStat with Atoms table ### 437 if atomsidict.has_key(coord[0]): 438 tempval=atomsidict[coord[0]] 439 coord[0]=tempval[0] 440 coord.insert(4,(tempval[1])) 441 else: 442 print 'Error: Specified Atoms foreign key not available!' 443 self.log.error('Specified Atoms foreign key not available') 444 return None, None 445 446 # ### search atom charges in PropAtomStatInt ### 447 tablename='PropAtomStatInt'#tablename 448 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'AtomStatIdFk']#table columname list 449 # atchargeint=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=atcharge,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 450 # if atchargeint and len(atchargeint)==1: 451 # coord.append(atchargeint.values()[0][0]) 452 # corrlist.append(True) 453 # else:#no or more than one charge value for specified atom 454 # coord.append(0) 455 # corrlist.append(False) 456 457 ### search atom serial number in PropAtomStatInt ### 458 atseqnumint=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=atseqnum,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 459 if atseqnumint and len(atseqnumint)==1: 460 coord.append(atseqnumint.values()[0][0]) 461 else:#no or more than one charge value for specified atom 462 coord.append(0) 463 464 ### search atom pdb symbol PropAtomStatText table ### 465 tablename='PropAtomStatText'#tablename 466 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'AtomStatIdFk']#table columname list 467 atpdbsymboltext=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=atpdbsymbol,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 468 if atpdbsymboltext and len(atpdbsymboltext)==1: 469 coord.append(atpdbsymboltext.values()[0][0]) 470 pdbsymbolcorrlist.append(True) 471 else:#no or more than one charge value for specified atom 472 coord.append(None) 473 pdbsymbolcorrlist.append(False) 474 ### search atom alternate location PropAtomStatText ### 475 ataltloctext=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=ataltloc,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 476 if ataltloctext and len(ataltloctext)==1: 477 coord.append(ataltloctext.values()[0][0]) 478 else:#no or more than one charge value for specified atom 479 coord.append(' ') 480 481 ### search atom insertion code PropAtomStatText ### 482 atainscodetext=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=atinscode,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 483 if atainscodetext and len(atainscodetext)==1: 484 coord.append(atainscodetext.values()[0][0]) 485 else:#no or more than one charge value for specified atom 486 coord.append(' ') 487 488 ### search atom occupancy PropAtomStatFloat table ### 489 tablename='PropAtomStatFloat'#tablename 490 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'AtomStatIdFk']#table columname list 491 atoccupfloat=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=atoccup,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 492 if atoccupfloat and len(atoccupfloat)==1: 493 coord.append(atoccupfloat.values()[0][0]) 494 else:#no or more than one charge value for specified atom 495 coord.append(1.0) 496 497 ### search atom b factor PropAtomStatFloat table ### 498 atbfactorfloat=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=atbfactor,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 499 if atbfactorfloat and len(atbfactorfloat)==1: 500 coord.append(atbfactorfloat.values()[0][0]) 501 else:#no or more than one charge value for specified atom 502 coord.append(0.0) 503 504 self.kwargs['db']=macdbname#set database to macromolecule 505 506 # if all(corrlist):#check if setting molecule charges by OE is needed 507 # setmolcharge=False 508 # else: 509 # setmolcharge=True 510 # confmolcharge[confstatid]=setmolcharge 511 # if all(pdbsymbolcorrlist):#check if setting molecule pdb atom symbols by OE is needed 512 # setpdbsymbol=False 513 # else: 514 # setpdbsymbol=True 515 # confpdbsymbol[confstatid]=setpdbsymbol 516 hetmolid,hetobj=self.createhetmoldict(hetdict=resdict,chaindict=revChainIDict,modeldict=revModelIDict,atomstatdict=atomstatid,elemtabdict=ElemTabIDict) 517 if not hetobj: 518 return None, None 519 hetmoldict[hetmolid]=hetobj 520 521 macmoldict.update(hetmoldict) 522 ### get header data ### 523 headerdict={} 524 if self.kwargs['tags']: 525 ### get data from HeadKeywDat table ### 526 headkeywdatdict=self.getHeadKeywDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 527 if headkeywdatdict: 528 lenheadkeywdatdict=len(headkeywdatdict) 529 if lenheadkeywdatdict>1: 530 print 'Error: Specified %s headers in HeadKeywDat table!'%(lenheadkeywdatdict) 531 self.log.error('Specified %s headers in HeadKeywDat table',lenheadkeywdatdict) 532 else: 533 ### set HEADER, KEYWDS and TITLE tags ### 534 headerdict.update(self.setheadertitlekeywds(headkeywdatdict)) 535 ### get data from ExperimDat table ### 536 experimdatdict=self.getExperimDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 537 if experimdatdict: 538 lenexperimdatdict=len(experimdatdict) 539 if lenexperimdatdict>1: 540 print 'Error: Specified %s experimental data in ExperimDat table!' %(lenexperimdatdict) 541 self.log.error('Specified %s experimental data in ExperimDat table',lenexperimdatdict) 542 else: 543 ### set EXPDTA tags ### 544 headerdict.update(self.setexpdta(experimdatdict)) 545 ### get data from DBRafDat table ### 546 dbrefdatdict=self.getDBRefDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 547 if dbrefdatdict: 548 ### set DBREF ### 549 headerdict.update(self.setdbref(dbrefdatdict,revChainIDict)) 550 ### get data from JournalDat table ### 551 journaldatdict=self.getJournalDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 552 if journaldatdict: 553 lenjournaldatdict=len(journaldatdict) 554 if lenjournaldatdict>1: 555 print 'Error: Specified %s journal data in JournalDat table!'%(lenjournaldatdict) 556 self.log.error('Specified %s journal data in JournalDat table',lenjournaldatdict) 557 else: 558 ### set JRNL ### 559 headerdict.update(self.setjrnl(journaldatdict)) 560 ### get data from SeqAdvDat table ### 561 seqadvdatdict=self.getSeqAdvDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 562 if seqadvdatdict: 563 ### set SEQADV ### 564 headerdict.update(self.setseqadv(seqadvdatdict,revChainIDict)) 565 ### get data from MatrixDat table ### 566 matrixdatdict=self.getMatrixDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 567 if matrixdatdict: 568 lenmatrixdatdict=len(matrixdatdict) 569 if lenmatrixdatdict>1: 570 print 'Error: Specified %s transformation data in MatrixDat table!'%(lenmatrixdatdict) 571 self.log.error('Specified %s transformation data in MatrixDat table',lenmatrixdatdict) 572 else: 573 ### set MTRIXn ### 574 headerdict.update(self.setmatrix(matrixdatdict)) 575 ### get data from OrigDat table ### 576 origdatdict=self.getOrigDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 577 if origdatdict: 578 lenorigdatdict=len(origdatdict) 579 if lenorigdatdict>1: 580 print 'Error: Specified %s transformation data in OrigDat table!'%(lenorigdatdict) 581 self.log.error('Specified %s transformation data in OrigDat table',lenorigdatdict) 582 else: 583 ### set ORIGXn ### 584 headerdict.update(self.setorig(origdatdict)) 585 ### get data from ScalDat table ### 586 scaldatdict=self.getScalDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=entryid) 587 if scaldatdict: 588 lenscaldatdict=len(scaldatdict) 589 if lenscaldatdict>1: 590 print 'Error: Specified %s transformation data in ScalDat table!'%(lenscaldatdict) 591 self.log.error('Specified %s transformation data in ScalDat table',lenscaldatdict) 592 else: 593 ### set SCALEn ### 594 headerdict.update(self.setscal(scaldatdict)) 595 ### get data from ChainReSeq ### 596 chainreseqdict=self.getChainReSeq(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,IDict=revChainIDict) 597 if chainreseqdict: 598 ### set SEQRES ### 599 headerdict.update(self.setseqres(chainreseqdict)) 600 601 return macmoldict,headerdict
602
603 - def setseqres(self,chainreseqdict):
604 """ 605 set SEQRES in pdb file 606 INPUT: 607 chainreseqdict - dict, data from ChainReSeq table {chain:[residue_name]} 608 OUTPUT: 609 seqresdict - dict, tag and tag value dictionary 610 """ 611 seqresdict={} 612 linelist=[] 613 for chainid, seqreslist in chainreseqdict.iteritems(): 614 lenseqreslist=len(seqreslist) 615 numloop,rest=divmod(lenseqreslist,13) 616 for loop in range(numloop): 617 templine='' 618 templine+=str(loop+1)#set serNum 619 templine+=' '+chainid#set chainID 620 templine+=' '+str(lenseqreslist).rjust(4,' ')#set numRes 621 templine+=2*' ' 622 templine+=' '.join(seqreslist[loop*13:loop*13+13])#set resName 623 linelist.append(templine) 624 if rest: 625 templine='' 626 templine+=str(numloop+1) 627 templine+=' '+chainid#set chainID 628 templine+=' '+str(lenseqreslist).rjust(4,' ')#set numRes 629 templine+=2*' ' 630 templine+=' '.join(seqreslist[numloop*13:numloop*13+rest])#set resName 631 linelist.append(templine) 632 seqresdict['SEQRES']=linelist 633 634 return seqresdict
635
636 - def setscal(self,scaldatdict):
637 """ 638 set SCALEn in pdb file 639 INPUT: 640 scaldatdict - list, data from ScalDat table 641 OUTPUT: 642 scaldict - dict, tag and tag value dictionary 643 """ 644 scaldict={} 645 scaldatdict=scaldatdict[0] 646 for i in range(1,4): 647 templine='' 648 for j in range(1,4):#set O[i,j] 649 s=scaldatdict['S%s%s'%(i,j)] 650 if s is not None: 651 s=str(s).split('.') 652 s=s[0].rjust(3,' ')+'.'+s[1].ljust(6,'0') 653 templine+=s 654 else: 655 templine+=10*' ' 656 u=scaldatdict['U%s'%(i)] 657 if u is not None: 658 u=str(u).split('.') 659 u=u[0].rjust(4,' ')+'.'+u[1].ljust(5,'0') 660 templine+=5*' '+u 661 else: 662 templine+=15*' ' 663 scaldict['SCALE%s'%(i)]=[templine] 664 665 return scaldict
666
667 - def setorig(self,origdatdict):
668 """ 669 set ORIGXn in pdb file 670 INPUT: 671 origdatdict - list, data from OrigDat table 672 OUTPUT: 673 origdict - dict, tag and tag value dictionary 674 """ 675 origdict={} 676 origdatdict=origdatdict[0] 677 for i in range(1,4): 678 templine='' 679 for j in range(1,4):#set O[i,j] 680 o=origdatdict['O%s%s'%(i,j)] 681 if o is not None: 682 o=str(o).split('.') 683 o=o[0].rjust(3,' ')+'.'+o[1].ljust(6,'0') 684 templine+=o 685 else: 686 templine+=10*' ' 687 t=origdatdict['T%s'%(i)] 688 if t is not None: 689 t=str(t).split('.') 690 t=t[0].rjust(4,' ')+'.'+t[1].ljust(5,'0') 691 templine+=5*' '+t 692 else: 693 templine+=15*' ' 694 origdict['ORIGX%s'%(i)]=[templine] 695 696 return origdict
697
698 - def setmatrix(self,matrixdatdict):
699 """ 700 set MTRIXn in pdb file 701 INPUT: 702 matrixdatdict - list, data from MatrixDat table 703 OUTPUT: 704 matrixdict - dict, tag and tag value dictionary 705 """ 706 matrixdict={} 707 matrixdatdict=matrixdatdict[0] 708 for i in range(1,4): 709 templine='' 710 serial=matrixdatdict['SerNum'] 711 if serial is not None:#set serial 712 serial=str(serial) 713 templine+=serial 714 for j in range(1,4):#set m[i,j] 715 m=matrixdatdict['M%s%s'%(i,j)] 716 if m is not None: 717 m=str(m).split('.') 718 m=m[0].rjust(3,' ')+'.'+m[1].ljust(6,'0') 719 templine+=m 720 else: 721 templine+=10*' ' 722 v=matrixdatdict['V%s'%(i)] 723 if v is not None:#set v[n] 724 v=str(v).split('.') 725 v=v[0].rjust(4,' ')+'.'+v[1].ljust(5,'0') 726 templine+=5*' '+v 727 else: 728 templine+=15*' ' 729 g=matrixdatdict['iG%s'%(i)] 730 if g:#set iG[n] 731 g=str(g).rjust(5,' ') 732 templine+=g 733 else: 734 templine+=5*' ' 735 matrixdict['MTRIX%s'%(i)]=[templine] 736 737 return matrixdict
738
739 - def setseqadv(self,seqadvdatdict,chaindict):
740 """ 741 set SEQADV in pdb file 742 INPUT: 743 seqadvdatdict - list, data from SeqAdvDat table 744 chaindict - dict, {chain_id: chain_name} 745 OUTPUT: 746 seqadvdict - dict, tag and tag value dictionary 747 """ 748 seqadvdict={} 749 linelist=[] 750 for seqadvitem in seqadvdatdict: 751 templine='' 752 templine+=" "#set idCode 753 resname=seqadvitem['ResName']#set resName 754 if resname: 755 resname=resname.rjust(3,' ') 756 templine+=' '+resname 757 else: 758 templine+=4*' ' 759 chain=chaindict.get(seqadvitem['ChainIdFk'],None)#set chainID 760 if not chain.isalpha() or len(chain)>1: 761 continue 762 templine+=' '+chain 763 seqnum=seqadvitem['SeqNum']#set seqNum 764 if seqnum: 765 seqnum=str(seqnum).rjust(4,' ') 766 templine+=' '+seqnum 767 else: 768 templine+=5*' ' 769 inscode=seqadvitem['InsCode']#set iCode 770 if inscode: 771 templine+=inscode 772 else: 773 templine+=' ' 774 database=seqadvitem['DBName']#set database 775 if database: 776 database=database.ljust(4,' ') 777 templine+=" "+database 778 else: 779 templine+=5*" " 780 dbidcode=seqadvitem['DBCode']#set dbIdCode 781 if dbidcode: 782 dbidcode=dbidcode.ljust(9,' ') 783 templine+=" "+dbidcode 784 else: 785 templine+=10*" " 786 dbres=seqadvitem['DBResName']#set dbRes 787 if dbres: 788 dbres=dbres.ljust(3,' ') 789 templine+=" "+dbres 790 else: 791 templine+=4*" " 792 dbseq=seqadvitem['DBSeqNum']#set dbSeq 793 if dbseq: 794 dbseq=str(dbseq).rjust(5,' ') 795 templine+=' '+dbseq 796 else: 797 templine+=6*' ' 798 conflict=seqadvitem['Conflict'] 799 if conflict: 800 conflict=conflict.ljust(21,' ') 801 templine+=' '+conflict 802 else: 803 templine+=22*' ' 804 linelist.append(templine) 805 seqadvdict['SEQADV']=linelist 806 return seqadvdict
807
808 - def setjrnl(self,journaldatdict):
809 """ 810 set JRNL in pdb file 811 INPUT: 812 journaldatdict - list, data from JournalDat table 813 OUTPUT: 814 journaldict - dict, tag and tag value dictionary 815 """ 816 journaldict={} 817 journaldatdict=journaldatdict[0] 818 author=journaldatdict['Author'] 819 linelist=[] 820 if author:# get author 821 splitauthor=self.splitline(author,50,',') 822 lensplitauthor=len(splitauthor) 823 counter=0 824 for splitauthoritem in splitauthor: 825 counter+=1 826 templine='' 827 countnumber=splitauthoritem[0:2].strip() 828 if countnumber.isdigit(): 829 if len(countnumber)==1: 830 splitauthoritem=' '+splitauthoritem 831 else: 832 splitauthoritem=3*' '+splitauthoritem 833 if counter<lensplitauthor: 834 if not splitauthoritem.rstrip().endswith(','): 835 splitauthoritem+=',' 836 templine+=' AUTH'+splitauthoritem 837 linelist.append(templine) 838 title=journaldatdict['Title']#get title 839 if title: 840 splititle=self.splitline(title,50,' ') 841 for splititleitem in splititle: 842 templine='' 843 countnumber=splititleitem[0:2].strip() 844 if countnumber.isdigit(): 845 if len(countnumber)==1: 846 splititleitem=' '+splititleitem 847 else: 848 splititleitem=3*' '+splititleitem 849 templine+=' TITL'+splititleitem 850 linelist.append(templine) 851 volume=journaldatdict['Volume']#get journal volume 852 if volume: 853 volume=str(volume).rjust(4,' ') 854 volume='V.'+volume 855 page=journaldatdict['Page'] 856 if page:#get page 857 page=str(page).rjust(5,' ') 858 year=journaldatdict['Year'] 859 if year: 860 year=str(year).rjust(4,' ') 861 journal=journaldatdict['Journal']#get journal 862 if journal: 863 splitjournal=self.splitline(journal,28,'.',' ') 864 for splitjournalitem in splitjournal: 865 templine='' 866 countnumber=splitjournalitem[0:2].strip() 867 if countnumber.isdigit(): 868 if len(countnumber)==1: 869 splitjournalitem=' '+splitjournalitem 870 else: 871 splitjournalitem=3*' '+splitjournalitem 872 templine+=' REF '+splitjournalitem 873 if volume: 874 templine+=' '+volume 875 else: 876 templine+=7*' ' 877 if page: 878 templine+=' '+page 879 else: 880 templine+=2*' ' 881 if year: 882 templine+=' '+year 883 else: 884 templine+=5*' ' 885 linelist.append(templine) 886 887 journaldict['JRNL']=linelist 888 return journaldict
889
890 - def setdbref(self,dbrefdatdict,chaindict):
891 """ 892 set DBREF in pdb file 893 dbrefdatdict - list, data from DBRefDat table 894 chaindict - dict, {chain_id: chain_name} 895 OUTPUT: 896 dbrefdict - dict, tag and tag value dictionary 897 """ 898 dbrefdict={} 899 linelist=[] 900 for dbrefitem in dbrefdatdict: 901 templine='' 902 templine+=2*" "#set idCode 903 chain=chaindict.get(dbrefitem['ChainIdFk'],None)#set chainID 904 if not chain.isalpha() or len(chain)>1: 905 continue 906 templine+=' '+chain 907 ### set seqBegin ### 908 seqbeg=dbrefitem['SeqBeg'] 909 if seqbeg: 910 seqbeg=str(seqbeg) 911 seqbeg=seqbeg.rjust(4,' ') 912 templine+=" "+seqbeg 913 else: 914 templine+=5*" " 915 ### set insertBegin ### 916 insbeg=dbrefitem['InsBeg'] 917 if insbeg: 918 templine+=insbeg 919 else: 920 templine+=' ' 921 ### set seqEnd ### 922 seqend=dbrefitem['SeqEnd'] 923 if seqend: 924 seqend=str(seqend) 925 seqend=seqend.rjust(4,' ') 926 templine+=" "+seqend 927 else: 928 templine+=5*" " 929 ### set insertEnd ### 930 insend=dbrefitem['InsEnd'] 931 if insend: 932 templine+=insend 933 else: 934 templine+=' ' 935 ### set database ### 936 database=dbrefitem['DBName'] 937 if database: 938 database=database.ljust(6,' ') 939 templine+=" "+database 940 else: 941 templine+=7*" " 942 ### set dbAccession ### 943 dbaccession=dbrefitem['DBAccess'] 944 if dbaccession: 945 dbaccession=str(dbaccession) 946 dbaccession=dbaccession.ljust(8,' ') 947 templine+=" "+dbaccession 948 else: 949 templine+=9*" " 950 ### set dbIdCode ### 951 dbidcode=dbrefitem['DBCode'] 952 if dbidcode: 953 dbidcode=dbidcode.ljust(12,' ') 954 templine+=" "+dbidcode 955 else: 956 templine+=13*" " 957 ### set dbseqBegin ### 958 dbseqbeg=dbrefitem['DBSeqBeg'] 959 if dbseqbeg: 960 dbseqbeg=str(dbseqbeg) 961 dbseqbeg=dbseqbeg.rjust(5,' ') 962 templine+=" "+dbseqbeg 963 else: 964 templine+=6*" " 965 ### set dbinsBeg ### 966 dbinsbeg=dbrefitem['DBInsBeg'] 967 if dbinsbeg: 968 templine+=dbinsbeg 969 else: 970 templine+=' ' 971 ### set dbseqEnd ### 972 dbseqend=dbrefitem['DBSeqEnd'] 973 if dbseqend: 974 dbseqend=str(dbseqend) 975 dbseqend=dbseqend.rjust(5,' ') 976 templine+=" "+dbseqend 977 else: 978 templine+=6*" " 979 ### dbinsEnd ### 980 dbinsend=dbrefitem['DBInsEnd'] 981 if dbinsend: 982 templine+=dbinsend 983 984 linelist.append(templine) 985 986 dbrefdict['DBREF']=linelist 987 988 return dbrefdict
989
990 - def setexpdta(self,experimdatdict):
991 """ 992 set EXPDTA in pdb file 993 INPUT: 994 experimdatdict - list, data from ExperimDat table 995 OUTPUT: 996 expdatdict - dict, tag and tag value dictionary 997 """ 998 expdatdict={} 999 experimdatdict=experimdatdict[0] 1000 transdict={'ExperimName':'EXPDTA'} 1001 for tagkey, tagval in experimdatdict.iteritems(): 1002 if transdict.has_key(tagkey): 1003 tagval=self.splitline(tagval,60) 1004 expdatdict[transdict[tagkey]]=tagval 1005 return expdatdict
1006
1007 - def setheadertitlekeywds(self,headkeywdatdict):
1008 """ 1009 set HEADER, KEYWDS and TITLE in pdb file 1010 INPUT: 1011 headkeywdatdict - list, data from HeadKeywDat table 1012 OUTPUT: 1013 headict - dict, tag and tag value dictionary 1014 """ 1015 headict={} 1016 headkeywdatdict=headkeywdatdict[0] 1017 for tagkey,tagval in headkeywdatdict.iteritems(): 1018 if (tagkey=='Class') and tagval is not None: 1019 lentagval=len(tagval) 1020 tagval=tagval+(40-lentagval)*' ' 1021 if headkeywdatdict['DepositDate'] is not None: 1022 tagval=tagval+headkeywdatdict['DepositDate'] 1023 headkeywdatdict[tagkey]=tagval 1024 header=headkeywdatdict['Class'] 1025 if header is not None: 1026 headict['HEADER']=[header] 1027 title=headkeywdatdict['Title'] 1028 if title is not None: 1029 title=self.splitline(title,60) 1030 headict['TITLE']=title 1031 keywds=headkeywdatdict['KeyWords'] 1032 if keywds is not None: 1033 keywds=self.splitline(keywds,60) 1034 headict['KEYWDS']=keywds 1035 1036 return headict
1037
1038 - def splitline(self,tagvalue,linelen,specificator=" ",fillspec=''):
1039 """ 1040 Split specified string into list of strings 1041 INPUT: 1042 tagvalue - str, tag value 1043 linelen - int, line length 1044 specificator - str, split specificator 1045 fillspec - str, fill tag value to line length with specificator, default empty 1046 OUTPUT: 1047 linelist - list, list of strings 1048 """ 1049 linelist=[] 1050 counter=0 1051 linelen+=1 1052 if len(tagvalue)<linelen: 1053 if fillspec: 1054 tagvalue=tagvalue.ljust(linelen,fillspec) 1055 linelist=[tagvalue] 1056 return linelist 1057 while True: 1058 counter+=1 1059 tagline=tagvalue[0:linelen] 1060 if not tagline.endswith(' '): 1061 templine=tagline.split(specificator) 1062 tagline=specificator.join(templine[:-1]) 1063 templine=templine[-1] 1064 tagvalue=templine+tagvalue 1065 if counter>1: 1066 tagline=str(counter)+' '+tagline 1067 linelist.append(tagline) 1068 tagvalue=tagvalue[linelen:] 1069 1070 if (len(tagvalue)<linelen): 1071 tagvalue=str(counter+1)+' '+tagvalue.lstrip() 1072 if fillspec: 1073 tagvalue=tagvalue.ljust(linelen,fillspec) 1074 linelist.append(tagvalue) 1075 break 1076 1077 return linelist
1078
1079 - def createhetmoldict(self,hetdict,chaindict,modeldict,atomstatdict,molchargeflag=None,pdbsymbolflag=None,conntabdict={},elemtabdict={},hetflag=True):
1080 """ 1081 create heteromolecule dictionary 1082 INPUT: 1083 hetdict - dict, heteromolecule table dictionary 1084 chaindict - dict, chain table dictionary 1085 modeldict - dict, model table dictionary 1086 atomstatdict - dict, heteromolecule atom stat table dictionary 1087 molchargeflag - boolean, set heteromolecule charge OE charge 1088 pdbsymbolflag - boolean, set hetermolecule pdb OE symbols 1089 altlocdict - dict, alternate location dictionary 1090 elemtabdict - dict, elements table dictionary, default empty 1091 OUTPUT: 1092 macmoldict - dict, macromolecule dictionary 1093 """ 1094 modelid=modeldict[hetdict['ModelIdFk']]#hetmol modelid 1095 chainid=hetdict['ChainIdFk']#hetmol chainid 1096 if chainid: 1097 chainid=chaindict[chainid] 1098 else: 1099 chainid=' ' 1100 hetid=hetdict['SeqNum']#hetmol id 1101 hetname=hetdict['MolCode']#hetmol name 1102 hetatnum=hetdict['PDBNumHetAt']#hetmol atom number 1103 hetfragnum=0#hetmol fragment number, default 0 1104 hetsecstruct=0#hetmol secondary structure, default = 1105 hetflag=hetflag#hetmol flag 1106 objdict={}#object dictionary 1107 hetmolcoords=atomstatdict.values()[0]#hetmol coordinates 1108 hetinscodelist=[] 1109 for coord in hetmolcoords:#iterate coordinates 1110 ### [atnum,posX,posY,posZ,elemtabidfk,atomstatid,atseqnum,atpdbsymbol,ataltloc,atinscode,atoccup,atbfactor ### 1111 atdict={} 1112 atnum=coord[4]#get atomic number 1113 atdict['num']=atnum 1114 if elemtabdict: 1115 atsymbol=elemtabdict[atnum] 1116 else: 1117 atsymbol=OEGetAtomicSymbol(atnum)#get atom symbol 1118 atdict['symbol']=atsymbol#get atom symbol 1119 ataltloc=coord[8]#get atom alternate location 1120 atname=coord[7]#get atom name 1121 if atname is not None: 1122 spaces=4-len(atname)#number of spaces in pdb atom name 1123 if spaces: 1124 atname=' '+atname+(spaces-1)*' ' 1125 atdict['name']=atname 1126 else: 1127 print 'Error: Can not retrieve atom name from %s - %s' %(hetname,hetid) 1128 self.log.error('Can not retrieve atom name from %s - %s',hetname,hetid) 1129 return 0, 0 1130 if ataltloc!=" ": 1131 atname='%s_%s' %(atname,ataltloc) 1132 atdict['name']=atname 1133 atsernum=coord[6]#get atom serial number 1134 atdict['sernum']=atsernum 1135 atbfactor=coord[11]#get atom b factor 1136 atdict['bfactor']=atbfactor 1137 atoccup=coord[10]#get atom occupancy 1138 atdict['occup']=atoccup 1139 atishet=hetflag#set heteromolecle flag 1140 atdict['ishet']=atishet 1141 atcoord=(coord[1],coord[2],coord[3])#get atom coordinates 1142 atdict['coords']=atcoord 1143 atdict['altloc']=ataltloc 1144 atinscode=coord[9] 1145 hetinscodelist.append(atinscode) 1146 atdict['atinscode']=atinscode 1147 # atcharge=coord[6] 1148 # atdict['atcharge']=atcharge 1149 if objdict.has_key(atname):#if atom id exists in dictionary 1150 print 'Error: Invalid key: %s, serial number: %s! Please, change atom name in PDB file ...' %(atname.strip(),atsernum) 1151 return 0, 0 1152 objdict[atname]=Atom(**atdict)#add atom class object to dictionary 1153 ### check if all atoms have common insertion code ### 1154 if all([True for atinscode in hetinscodelist if (atinscode==hetinscodelist[0])]): 1155 hetinscode=hetinscodelist[0] 1156 else: 1157 hetinscode=' ' 1158 obj=Molecule(modelid,chainid,hetid,hetname,hetatnum,hetinscode,hetfragnum,hetsecstruct,objdict,hetflag)#class object 1159 if chainid.isspace(): 1160 if hetinscode.isspace(): 1161 hetid='%s_%s' %(modelid,hetid) 1162 else: 1163 hetid='%s_%s%s' %(modelid,hetid,hetinscode) 1164 else: 1165 if hetinscode.isspace(): 1166 hetid='%s_%s_%s' %(modelid,chainid,hetid) 1167 else: 1168 hetid='%s_%s_%s%s'%(modelid,chainid,hetid,hetinscode) 1169 1170 return hetid, obj
1171
1172 - def createmacmoldict(self,resdict,chaindict,modeldict,resatomsdict,resatomstatdict,altlocdict,elemtabdict={},hetflag=False):
1173 """ 1174 create macromolecule dictionary 1175 INPUT: 1176 resdict - dict, residue table dictionary 1177 chaindict - dict, chain table dictionary 1178 modeldict - dict, model table dictionary 1179 resatomsdict - dict, residue atoms table dictionary 1180 resatomstatdict - dict, residue atom stat table dictionary 1181 altlocdict - dict, alternate location dictionary 1182 elemtabdict - dict, elements table dictionary, default empty 1183 OUTPUT: 1184 macmoldict - dict, macromolecule dictionary 1185 """ 1186 modelid=modeldict[resdict['ModelIdFk']]#get modelid 1187 chainid=chaindict[resdict['ChainIdFk']]#get chainid 1188 resid=resdict['SeqNum']#get residueid 1189 resname=resdict['ResName']#get residue name 1190 resatnum=len(resatomsdict)#get residue atom number 1191 resinscode=resdict['ResInsCode']#residue insert code 1192 if resinscode is None: 1193 resinscode=' ' 1194 resfragnum=0#residue fragment number, default 0 1195 resecstruct=0#residue secondary structure, default = 1196 hetflag=hetflag#residue flag 1197 objdict={}#object dictionary 1198 if len(resatomsdict)!=len(resatomstatdict): 1199 print 'Error: Invalid residue description (model: %s, chain: %s, residue_number: %s, residue_name: %s)'%(modelid,chainid,resid,resname) 1200 self.log.error('Invalid residue description (model: %s, chain: %s, residue_number: %s, residue_name: %s)',modelid,chainid,resid,resname) 1201 return 0, 0 1202 diff=set(resatomsdict.keys())-set(resatomstatdict.keys()) 1203 if diff: 1204 print 'Error: Invalid residue description (model: %s, chain: %s, residue_number: %s, residue_name: %s)'%(modelid,chainid,resid,resname) 1205 self.log.error('Invalid residue description (model: %s, chain: %s, residue_number: %s, residue_name: %s)',modelid,chainid,resid,resname) 1206 return 0, 0 1207 for resatomsid,resatomsdict in resatomsdict.iteritems(): 1208 atdict={} 1209 atnum=resatomsdict['ElemTabIdFk']#get atomic number 1210 atdict['num']=atnum 1211 if elemtabdict: 1212 atsymbol=elemtabdict[atnum] 1213 else: 1214 atsymbol=OEGetAtomicSymbol(atnum)#get atom symbol 1215 atdict['symbol']=atsymbol 1216 ataltloc=resatomsdict['AltLocIdFk'] 1217 atname=resatomsdict['AtPDBSymbol']#get atom name 1218 spaces=4-len(atname)#number of spaces in pdb atom name 1219 if spaces: 1220 atname=' '+atname+(spaces-1)*' ' 1221 if ataltloc is None: 1222 ataltloc=' ' 1223 else: 1224 ataltloc=altlocdict[str(ataltloc)] 1225 if atname: 1226 atdict['name']=atname 1227 else: 1228 print 'Error: Can not retrieve atom name from %s' %resname 1229 self.log.error('Can not retrieve atom name from %s',resname) 1230 return 0, 0 1231 if ataltloc!=" ": 1232 atname='%s_%s' %(atname,ataltloc) 1233 atdict['name']=atname 1234 atsernum=resatomsdict['AtSeqNum']#get atom serial number 1235 atdict['sernum']=atsernum 1236 atbfactor=resatomstatdict[resatomsid][4] 1237 atdict['bfactor']=atbfactor#get atom b factor 1238 atoccup=resatomstatdict[resatomsid][3]#get atom occupancy 1239 atdict['occup']=atoccup 1240 atishet=hetflag#set heteromolecle flag 1241 atdict['ishet']=atishet 1242 atcoord=(resatomstatdict[resatomsid][0],resatomstatdict[resatomsid][1],resatomstatdict[resatomsid][2])#get atom coordinates 1243 atdict['coords']=atcoord 1244 atdict['altloc']=ataltloc 1245 if objdict.has_key(atname):#if atom id exists in dictionary 1246 print 'Error: Invalid key: %s, serial number: %s! Please, change atom name in PDB file ...' %(atname.strip(),atsernum) 1247 return 0, 0 1248 objdict[atname]=Atom(**atdict)#add atom class object to dictionary 1249 obj=Molecule(modelid,chainid,resid,resname,resatnum,resinscode,resfragnum,resecstruct,objdict,hetflag)#class object 1250 if chainid.isspace(): 1251 if resinscode.isspace(): 1252 resid='%s_%s' %(modelid,resid) 1253 else: 1254 resid='%s_%s%s' %(modelid,resid,resinscode) 1255 else: 1256 if resinscode.isspace(): 1257 resid='%s_%s_%s' %(modelid,chainid,resid) 1258 else: 1259 resid='%s_%s_%s%s'%(modelid,chainid,resid,resinscode) 1260 1261 return resid, obj
1262
1263 - def checkIDB(self,getid,idb):
1264 """ 1265 check whether specified value exists in database 1266 INPUT: 1267 getid - list, get Id 1268 idb - list, Id taken from database 1269 OUTPUT: 1270 corrconf - list, list of corrected id 1271 refid - list, list of refused id 1272 """ 1273 corrid=[] 1274 refid=[] 1275 for iditem in getid: 1276 if iditem in idb: 1277 corrid.append(iditem) 1278 else: 1279 refid.append(iditem) 1280 return corrid, refid
1281
1282 - def checkId(self,id):
1283 """ 1284 check Id 1285 INPUT: 1286 Id - input Id 1287 OUTPUT: 1288 Id - output Id 1289 """ 1290 if isinstance(id,(int,long)): 1291 id=[id] 1292 elif isinstance(id,str): 1293 try: 1294 id=int(id) 1295 except ValueError,e: 1296 print 'Error: %s' %e 1297 sys.exit(1) 1298 id=[id] 1299 elif isinstance(id,list): 1300 try: 1301 id=filter(lambda item: isinstance(item,(int,long)),id) 1302 except ValueError,e: 1303 print 'Error: %s' %e 1304 sys.exit(1) 1305 if not isinstance(id,list): 1306 print 'Error: Incorrect model Id! Must be integer, long or list! Please correct it ...' 1307 self.log.error('Incorrect model id. Must be integer, long or list') 1308 return 1309 return id
1310
1311 - def checkPropId(self,propname,propdefdict):
1312 """ 1313 check whether specified value exists in database 1314 INPUT: 1315 propname - list, property name list 1316 propdefdict - dict, property definiction dictionary 1317 OUTPUT: 1318 tags - dict, property id: property name dictionary 1319 reftags - list, refused tag name list 1320 """ 1321 reftags=[] 1322 tags={} 1323 ### invert dictionary ### 1324 revpropdefdict=dict([(propval,propkey) for propkey,propval in propdefdict.iteritems()]) 1325 for propitem in propname: 1326 if revpropdefdict.has_key(propitem): 1327 tags[revpropdefdict[propitem]]=propitem 1328 else: 1329 reftags.append(propitem) 1330 return tags, reftags
1331
1332 - def isMacMolTabDesCorr(self,tablename,tabcol):
1333 """ 1334 check macromolecule table description with table name 1335 INPUT: 1336 tablename- name of table 1337 tabcol - list of column names 1338 OUTPUT: 1339 boolean 1340 """ 1341 try: 1342 colname=self.macmoltabdesc[tablename].getColName()#get list of columns 1343 except KeyError,error: 1344 print 'Error, %s: Missing description for %s!' %(tablename,error) 1345 self.log.exception('%s: Missing description for %s',tablename,error) 1346 return False 1347 if len(colname)<len(tabcol):#check number of arguments 1348 print 'Error, %s: Too many arguments in table column list!!'%tablename 1349 self.log.error('%s: Too many arguments in table column list',tablename) 1350 return False 1351 else: 1352 for name in tabcol: 1353 if name in colname:#check if key in column list 1354 continue 1355 else: 1356 print 'Error, %s: Incorrect column name for %s!'%(tablename,name) 1357 self.log.error('%s: Incorrect column name for %s',tablename,name) 1358 return False 1359 return True
1360
1361 - def isTabDesCorr(self,tablename,tabcol):
1362 """ 1363 check macromolecule table description with table name 1364 INPUT: 1365 tablename- name of table 1366 tabcol - list of column names 1367 OUTPUT: 1368 boolean 1369 """ 1370 try: 1371 colname=self.tabdesc[tablename].getColName()#get list of columns 1372 except KeyError,error: 1373 print 'Error, %s: Missing description for %s!' %(tablename,error) 1374 self.log.exception('%s: Missing description for %s',tablename,error) 1375 return False 1376 if len(colname)<len(tabcol):#check number of arguments 1377 print 'Error, %s: Too many arguments in table column list!!'%tablename 1378 self.log.error('%s: Too many arguments in table column list',tablename) 1379 return False 1380 else: 1381 for name in tabcol: 1382 if name in colname:#check if key in column list 1383 continue 1384 else: 1385 print 'Error, %s: Incorrect column name for %s!'%(tablename,name) 1386 self.log.error('%s: Incorrect column name for %s',tablename,name) 1387 return False 1388 return True
1389 ################## End of class ######################################################## 1390 ############## MAIN ################################################################ 1391 ############ Example of usage ########################################################## 1392 if __name__=='__main__': 1393 pass 1394 pdbfile='macmol2pdb.pdb' 1395 ### no modelid - get all models accordnig pdbcode, not remove HETAT ### 1396 # A=DB2PDB(dirpath='/tmp/pdbfile',pdbfile=pdbfile,repflag=False,path='/tmp/Log',filename='MacMol2PDB', 1397 # host='',db='',user='',passwd='',log=False,ligandb='', 1398 # pdbcode='3ERD',rmHetmol=False,tags=True) 1399 1400 ### modelid int - get model according id, modelid, not remove HETAT ### 1401 # A=DB2PDB(dirpath='/tmp/pdbfile',pdbfile=pdbfile,repflag=False,path='/tmp/Log',filename='MacMol2PDB', 1402 # host='',db='',user='',passwd='',log=False,ligandb='', 1403 # id=1,pdbcode='3ERD',modelid=2,rmHetmol=False,tags=True) 1404 1405 ### modelid list - get models ### 1406 # A=DB2PDB(dirpath='/tmp/pdbfile',pdbfile=pdbfile,repflag=False,path='/tmp/Log',filename='MacMol2PDB', 1407 # host='',db='',user='',passwd='',log=False,ligandb='', 1408 # id=1,pdbcode='3ERD',modelid=[0,1,2],rmHetmol=False,tags=True) 1409