Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package Data2DB :: Module PDB2DB
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.Data2DB.PDB2DB

   1  #!/usr/bin/env python 
   2  ############################### 
   3  # PDB2DB.py                 # 
   4  # MacroMolecule import module # 
   5  ############################### 
   6   
   7  ###################################################### 
   8  # Copyright (c) 2007-2008 Andrzej Bak                # 
   9  # ARC Seibersdorf & University of Silesia            # 
  10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
  11  # License: GNU General Public License, version: 3    # 
  12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
  13  # Version: 1, 06.01.2010                             # 
  14  ###################################################### 
  15   
  16  try: 
  17      import sys 
  18      import os 
  19      from openeye.oechem import * 
  20      from MoSTBioDat.DataBase.ImportData.Data2DB.PDBFile import PDBFile,sortdictval,iswater,CanSmi 
  21      from MoSTBioDat.DataBase.ImportData.Data2DB.TaBuilder import TaBuilder 
  22      from MoSTBioDat.DataBase.ImportData.Data2DB.Smile import Smile 
  23      from MoSTBioDat.DataBase.ImportData.Data2DB.InsertMacMolTables import InsertMacMolTables 
  24      from MoSTBioDat.DataBase.Connect.MoSTBioDatErrors import Error 
  25  except ImportError,e: 
  26      print 'Error: %s' %e 
  27      sys.exit(1) 
  28  ################# PDB File to DataBase ############### 
29 -class PDB2DB(TaBuilder,InsertMacMolTables):
30 """ 31 Import data from PDB file into MacroMolecule database 32 INPUT: 33 host - string, host to connect 34 user - string, user to connect as 35 passwd - string, password to use 36 db - string, database to use 37 port - integer, TCP/IP port to connect 38 log - boolean, logging flag 39 ligandb - string, ligand database name 40 repflag - boolean, print report, default False 41 unix_socket - string, location of unix_socket to use 42 conv - conversion dictionary, see MySQLdb.converters 43 connect_timeout - number of seconds to wait before the connection attempt fails. 44 compress - if set, compression is enabled 45 named_pipe - if set, a named pipe is used to connect (Windows only) 46 init_command - command which is run once the connection is created 47 read_default_file - file from which default client values are read 48 read_default_group - configuration group to use from the default file 49 cursorclass - class object, used to create cursors (keyword only) 50 use_unicode - if True, text-like columns are returned as unicode objects 51 using the connection's character set. Otherwise, text-like 52 columns are returned as strings. columns are returned as 53 normal strings. Unicode objects will always be encoded to 54 the connection's character set regardless of this setting. 55 charset - if supplied, the connection character set will be changed 56 to this character set (MySQL-4.1 and newer). This implies 57 use_unicode=True. 58 sql_mode - if supplied, the session SQL mode will be changed to this 59 setting (MySQL-4.1 and newer). For more details and legal 60 values, see the MySQL documentation. 61 client_flag - integer, flags to use or 0 62 (see MySQL docs or constants/CLIENTS.py) 63 ssl - dictionary or mapping, contains SSL connection parameters; 64 see the MySQL documentation for more details 65 (mysql_ssl_set()). If this is set, and the client does not 66 support SSL, NotSupportedError will be raised. 67 local_infile - integer, non-zero enables LOAD LOCAL INFILE; zero disables 68 format - string format for log handler 69 filter - filter object from logger object 70 datefmt - data/time format 71 path - directory path to log file 72 filename - log filename, default log 73 filemode - mode to open log file, default='a' 74 level - set root logger level to specified level 75 logfilelevel- set level to log file 76 cache - create cache for query, default=True 77 scheme2file - Boolean - save database scheme to shelve file 78 79 OUTPUT: 80 class object 81 """
82 - def __init__(self,pdbfilepath,host='localhost',db='macmol',user=None,passwd=None,port=3306,ligandb=' ',log=False,**kwargs):
83 TaBuilder.__init__(self,host,db,user,passwd,port,log,**kwargs) 84 InsertMacMolTables.__init__(self) 85 self.pdbfilepath=pdbfilepath 86 self.ligandb=ligandb 87 if self.ligandb.isspace(): 88 self.ligandb=None 89 if not self.kwargs.has_key('repflag'): 90 self.kwargs.setdefault('repflag',False)
91
92 - def PDB2Tab(self,tabcolvaldict={},logdebug=False,lowercasetablenames=True, addH=False,rmHetmol=False):
93 """ 94 import data from pdb file to database tables 95 INPUT: 96 class object 97 lowercasetablenames - boolean, lower case table names MySQL engine settings, default True 98 logdebug - boolean, debug logging, default False 99 addH - add hydrogens with OpenEye software, default False 100 rmHetmol - remove heteromolecule, default False 101 OUTPUT: 102 importing data to database 103 """ 104 print 'Importing %s table description.' %self.kwargs['db'] 105 ResIDict={} 106 macdbname=self.kwargs['db']#get macromolecule database name 107 if self.status=='Disconnected': 108 print 'Error: Please reconnect to database! Connection has been closed by previous function.' 109 if logdebug: 110 self.log.error('Please reconnect to database. Connection has been closed by previous function.') 111 sys.exit(1) 112 self.macmoltabdesc=self.genTables()#generate table description 113 if self.ligandb: 114 try: 115 self.kwargs['db']=self.ligandb#set db to ligand database 116 print 'Importing %s table description.' %self.kwargs['db'] 117 self.tabdesc=self.genTables() 118 119 if lowercasetablenames: 120 name='propdef' 121 else: 122 name='PropDef' 123 propdefdict={name:[ 124 ['AtSeqNum','pdb atom sequence number'], 125 ['AtPDBSymbol','pdb atom symbol'], 126 ['AtOccup','pdb atom occupancy'], 127 ['AtBFactor','pdb B factor'], 128 ['Charge','atom charge'], 129 ['AtAltLoc','atom alternate location'], 130 ['AtInsCode','code for insertion of residues'], 131 ['EntryIdFk','macromolecule database Entry table foreign key'] 132 ]} 133 ### import to Ligand.PropDef table ### 134 PropDefIDict=self.PropDef(lowercasetablenames=lowercasetablenames,**propdefdict) 135 self.kwargs['db']=macdbname# set db to macromolecule database 136 if not PropDefIDict: 137 print 'Error: Incorrect import to PropDef table' 138 if logdebug: 139 self.log.error('Incorrect import to PropDef table') 140 sys.exit(1) 141 ResIDict['PropDefIDict']=PropDefIDict 142 except Error,e: 143 print 'Error: %s'%e 144 sys.exit(1) 145 146 print 'Importing data to database. Please wait ...' 147 ligmolist,macmolist=self.getmol(repflag=self.kwargs['repflag'],rmHetmol=rmHetmol)#get ligand and macromolecule sorted list of tuples 148 149 macorrflag,macaltloclist,macmodelist,machainlist,macmoldescdict,macmoldict=self.createmol(macmolist,self.macromolecule,rmHetmol=rmHetmol,addH=addH) 150 ligcorrflag,ligaltloclist,ligmodelist,ligchainlist,ligmoldescdict,ligmoldict=self.createmol(ligmolist,self.ligands,rmHetmol=rmHetmol,addH=addH) 151 if not ligcorrflag: 152 if self.ligandb: 153 print 'Warning: Cannot import to %s database!' %self.ligandb 154 self.log.warning('Cannot import to %s database',self.ligandb) 155 self.ligandb=None 156 157 molaltoclist=sorted(set(macaltloclist)|set(ligaltloclist))#molecule alternate list 158 molmodelist=sorted(set(macmodelist)|set(ligmodelist))#molecule model list 159 molchainlist=sorted(set(machainlist)|set(ligchainlist))#molecule chain list 160 161 self.log.info('Database data importing from %s',self.pdbfilepath) 162 ### PDB HEADER extraction ### 163 macmolspecific=self.headict.get('COMPND',None)#get macromolecule COMPND specification from pdb header 164 compndesc={} 165 if macmolspecific: 166 compndesc=self.getmacmolheader(macmolspecific)#macromolecule description dictionary 167 hetatdict={} 168 hetatname=self.headict.get('HETNAM',None)#get heteroatom name 169 if hetatname: 170 hetatdict=self.gethetatname(hetatname)#heteroatom name dictionary 171 formuldict={} 172 formul=self.headict.get('FORMUL',None)#get heteroatom formula 173 if formul: 174 formuldict=self.getformul(formul)#heteroatom formula dictionary 175 seqresdict={} 176 seqres=self.headict.get('SEQRES',None)#get chain residue sequence 177 if seqres: 178 seqresdict=self.getseqres(seqres)#chain sequence dictionary 179 hetdict={} 180 het=self.headict.get('HET',None) 181 if het: 182 hetdict=self.gethet(het)#get het description dictionary 183 ssbondict={} 184 ssbond=self.headict.get('SSBOND',None) 185 if ssbond: 186 ssbondict=self.getssbond(ssbond)#get disulfide bond dictionary 187 headkeywdatdict={} 188 headict={} 189 header=self.headict.get('HEADER',None) 190 if header: 191 headict=self.getheader(header)# get header dictionary 192 headkeywdatdict.update(headict) 193 titledict={} 194 title=self.headict.get('TITLE',None) 195 if title: 196 titledict=self.getitle(title,'Title') 197 headkeywdatdict.update(titledict)#get title dictionary 198 keywordict={} 199 keyword=self.headict.get('KEYWDS',None) 200 if keyword: 201 keywordict=self.getkeyword(keyword,'KeyWords')#get keyword dictionary 202 headkeywdatdict.update(keywordict) 203 experimdict={} 204 expdta=self.headict.get('EXPDTA',None) 205 if expdta: 206 experimdict=self.getexperimdat(expdta,'ExperimName')#get experimental dictionary 207 journaldict={} 208 jrnl=self.headict.get('JRNL',None) 209 if jrnl: 210 journaldict=self.getjournal(jrnl)#get journal dictionary 211 origdict={} 212 origx1dict={} 213 origx1=self.headict.get('ORIGX1',None) 214 if origx1: 215 origx1dict=self.getorigx(origx1,'O','T',1)#get original matrix dictionary 216 origdict.update(origx1dict) 217 origx2dict={} 218 origx2=self.headict.get('ORIGX2',None) 219 if origx2: 220 origx2dict=self.getorigx(origx2,'O','T',2)#get original matrix dictionary 221 origdict.update(origx2dict) 222 origx3dict={} 223 origx3=self.headict.get('ORIGX3',None) 224 if origx3: 225 origx3dict=self.getorigx(origx3,'O','T',3)#get original matrix dictionary 226 origdict.update(origx3dict) 227 scaledict={} 228 scale1dict={} 229 scale1=self.headict.get('SCALE1',None) 230 if scale1: 231 scale1dict=self.getscale(scale1,'S','U',1)#get scale matrix dictionary 232 scaledict.update(scale1dict) 233 scale2dict={} 234 scale2=self.headict.get('SCALE2',None) 235 if scale2: 236 scale2dict=self.getscale(scale2,'S','U',2)#get scale matrix dictionary 237 scaledict.update(scale2dict) 238 scale3dict={} 239 scale3=self.headict.get('SCALE3',None) 240 if scale3: 241 scale3dict=self.getscale(scale3,'S','U',3)#get scale matrix dictionary 242 scaledict.update(scale3dict) 243 mtrixdict={} 244 mtrix1dict={} 245 mtrix1=self.headict.get('MTRIX1',None) 246 if mtrix1: 247 mtrix1dict=self.getmatrix(mtrix1,1)#get matrix dictionary 248 mtrixdict.update(mtrix1dict) 249 mtrix2dict={} 250 mtrix2=self.headict.get('MTRIX2',None) 251 if mtrix2: 252 mtrix2dict=self.getmatrix(mtrix2,2)#get matrix dictionary 253 mtrixdict.update(mtrix2dict) 254 mtrix3dict={} 255 mtrix3=self.headict.get('MTRIX3',None) 256 if mtrix3: 257 mtrix3dict=self.getmatrix(mtrix3,3)#get matrix dictionary 258 mtrixdict.update(mtrix3dict) 259 dbreflist=[] 260 dbref=self.headict.get('DBREF',None) 261 if dbref: 262 dbreflist=self.getdbref(dbref)#get database reference dictionary 263 seqadvlist=[] 264 seqadv=self.headict.get('SEQADV',None) 265 if seqadv: 266 seqadvlist=self.getseqadv(seqadv)#get conflict dictionary 267 268 ### import to Entry table 269 EntryID=self.Entry(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headict=self.headict,conflag=False) 270 if not EntryID: 271 print 'Error: Incorrect import to Entry table' 272 self.log.error('Incorrect import to Entry table') 273 sys.exit(1) 274 275 ### import to User table ### 276 UserID=self.User(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=EntryID) 277 if not UserID: 278 print 'Error: Incorrect import to User table' 279 self.log.error('Incorrect import to User table') 280 sys.exit(1) 281 282 ### import to HeadKeywDat table ### 283 if headkeywdatdict: 284 HeadKeywDatID=self.HeadKeywDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerdict=headkeywdatdict,ID=EntryID) 285 if not HeadKeywDatID: 286 print 'Error: Incorrect import to HeadKeywDat table' 287 self.log.error('Incorrect import to HeadKeywDat table') 288 sys.exit(1) 289 290 ### import to ExperimDat table ### 291 if experimdict: 292 ExperimDatID=self.ExperimDat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerdict=experimdict,ID=EntryID) 293 if not ExperimDatID: 294 print 'Error: Incorrect import to ExperimDat table' 295 self.log.error('Incorrect import to ExperimDat table') 296 sys.exit(1) 297 298 ### import to JournalDat table ### 299 if journaldict: 300 JournalDatID=self.JournalDat(logdebug=logdebug,colvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerdict=journaldict,ID=EntryID) 301 if not JournalDatID: 302 print 'Error: Incorrect import to JournalDat table' 303 self.log.error('Incorrect import to JournalDat table') 304 sys.exit(1) 305 306 ### import to OrigDat table ### 307 if origdict: 308 OrigDatID=self.OrigDat(logdebug=logdebug,colvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerdict=origdict,ID=EntryID) 309 if not OrigDatID: 310 print 'Error: Incorrect import to OrigDat table' 311 self.log.error('Incorrect import to OrigDat table') 312 sys.exit(1) 313 314 ### import to ScalDat table ### 315 if scaledict: 316 ScalDatID=self.ScalDat(logdebug=logdebug,colvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerdict=scaledict,ID=EntryID) 317 if not ScalDatID: 318 print 'Error: Incorrect import to ScalDat table' 319 self.log.error('Incorrect import to ScalDat table') 320 sys.exit(1) 321 322 ### import to MatrixDat table ### 323 if mtrixdict: 324 MatrixDatID=self.MatrixDat(logdebug=logdebug,colvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerdict=mtrixdict,ID=EntryID) 325 if not MatrixDatID: 326 print 'Error: Incorrect import to MatrixDat table' 327 self.log.error('Incorrect import to MatrixDat table') 328 sys.exit(1) 329 330 ### import to Model table ### 331 ModelIDict=self.Model(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,modelist=molmodelist,ID=EntryID) 332 if not ModelIDict: 333 print 'Error: Incorrect import to Model table' 334 self.log.error('Incorrect import to Model table') 335 sys.exit(1) 336 337 ### import to AltLoc table ### 338 AltLocIDict=self.AltLoc(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,altloclist=molaltoclist,ID=EntryID) 339 ## import to Chain table ### 340 ChainIDict=self.Chain(logdebug=logdebug, tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,chainlist=molchainlist,ID=EntryID) 341 342 ### import to DBRefDat table ### 343 if dbreflist: 344 DBRefDatID=self.DBRefDat(logdebug=logdebug,colvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerlist=dbreflist,ID=EntryID,IDict=ChainIDict) 345 if not DBRefDatID: 346 print 'Error: Incorrect import to DBRefDat table' 347 self.log.error('Incorrect import to DBRefDat table') 348 sys.exit(1) 349 350 ### import to SeqAdvDat table ### 351 if seqadvlist: 352 SeqAdvDatID=self.SeqAdvDat(logdebug=logdebug,colvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headerlist=seqadvlist,ID=EntryID,IDict=ChainIDict) 353 if not SeqAdvDatID: 354 print 'Error: Incorrect import to SeqAdvDat table' 355 self.log.error('Incorrect import to SeqAdvDat table') 356 sys.exit(1) 357 358 ## prepare molecule dictionary 359 moleculedict=self.prepmoldict(macmoldescdict,ligmoldescdict,compndesc,machainlist,hetdict,hetatdict,formuldict) 360 if seqresdict: 361 ### import to ChainReSeq table ### 362 ChainReSeqIDict=self.ChainReSeq(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,seqresdict=seqresdict,IDict=ChainIDict) 363 364 ResIDict['ModelIDict']=ModelIDict 365 ResIDict['ChainIDict']=ChainIDict 366 367 for moldict in moleculedict.values():#iterate molecule dictionary 368 369 ### import to Molecule table ### 370 MoleculeID,ProtStatID,AtomsID=self.Molecule(logdebug=logdebug,tabcolvaldict=tabcolvaldict, lowercasetablenames=lowercasetablenames,moldict=moldict,ID=EntryID,IDict=ResIDict) 371 372 if not MoleculeID: 373 print 'Error: Incorrect import to Model table' 374 self.log.error('Incorrect import to Model table') 375 sys.exit(1) 376 ResIDict['ProtStatID']=ProtStatID 377 378 ### import to ChainMol table ### 379 ChainMolIDict=self.ChainMol(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,moldict=moldict,ID=MoleculeID,IDict=ChainIDict) 380 381 if moldict.has_key('MolType'):#if has molecule type 382 if moldict['MolType']=='P':#if protein 383 384 for reskey in macmolist: 385 mackey=reskey[0]#macromolecule key 386 macval=macmoldict[mackey]#macromolecule object 387 388 ### import to Residue table ### 389 ResidueID=self.Residue(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,moldict=moldict,ID=MoleculeID,IDict=ResIDict) 390 if not ResidueID: 391 continue 392 393 ### import to ReSSBond table ### 394 if ssbond: 395 ReSSBondID=self.ReSSBond(ogdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,ssbondict=ssbondict,ID=ResidueID,IDict=ChainIDict) 396 397 ### import to Atoms table ### 398 ResAtomsID=self.ResAtoms(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,ID=ResidueID,IDict=AltLocIDict) 399 if not ResAtomsID: 400 continue 401 402 ### import to ResAtomStat table ### 403 ResAtomStatID=self.ResAtomStat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,ID=ResidueID,IDlist=ResAtomsID) 404 if not ResAtomStatID: 405 continue 406 407 else: #not protein 408 409 for reskey in macmolist: 410 hetkey=reskey[0]#heteromolecule key 411 hetval=macmoldict[hetkey]#heteromolecule object 412 ### import to HetMol table ### 413 HetMolID=self.HetMol(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,hetkey=hetkey,hetval=hetval,moldict=moldict,ID=MoleculeID,IDict=ResIDict,IDlist=AtomsID) 414 for reskey in ligmolist: 415 hetkey=reskey[0]#heteromolecule key 416 hetval=ligmoldict[hetkey]#heteromolecule object 417 ### import to HetMol table ### 418 HetMolID=self.HetMol(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,hetkey=hetkey,hetval=hetval,moldict=moldict,ID=MoleculeID,IDict=ResIDict,IDlist=AtomsID) 419 420 else:#no MolType key in molecule dictionary 421 continue 422 print 'Import done.' 423 self.closeDB()#close database connection
424
425 - def addConf(self,tabcolvaldict={},logdebug=False,lowercasetablenames=True, addH=False,rmHetmol=False,rmMacmol=False):
426 """ 427 add conformation from pdb file to database tables 428 INPUT: 429 class object 430 lowercasetablenames - boolean, lower case table names MySQL engine settings, default True 431 logdebug - boolean, debug logging, default False 432 addH - add hydrogens with OpenEye software, default False 433 rmHetmol - remove heteromolecule, default False 434 rmMacMol - remove macromolecule, default False - if True - import only to Residue table 435 OUTPUT: 436 importing data to database 437 """ 438 print 'Importing %s table description.' %self.kwargs['db'] 439 ResIDict={} 440 macdbname=self.kwargs['db']#get macromolecule database name 441 if self.status=='Disconnected': 442 print 'Error: Please reconnect to database! Connection has been closed by previous function.' 443 if logdebug: 444 self.log.error('Please reconnect to database. Connection has been closed by previous function.') 445 sys.exit(1) 446 self.macmoltabdesc=self.genTables()#generate table description 447 if self.ligandb: 448 try: 449 self.kwargs['db']=self.ligandb#set db to ligand database 450 print 'Importing %s table description.' %self.kwargs['db'] 451 self.tabdesc=self.genTables() 452 453 ### get Ligand.PropDef table ### 454 PropDefIDict=self.getPropDef() 455 self.kwargs['db']=macdbname# set db to macromolecule database 456 if not PropDefIDict: 457 print 'Error: Incorrect import to PropDef table.' 458 if logdebug: 459 self.log.error('Incorrect import to PropDef table.') 460 sys.exit(1) 461 ResIDict['PropDefIDict']=PropDefIDict 462 except Error,e: 463 print 'Error: %s'%e 464 sys.exit(1) 465 print 'Inserting conformer data to database. Please wait ...' 466 ligmolist,macmolist=self.getmol(repflag=self.kwargs['repflag'],rmHetmol=rmHetmol)#get ligand and macromolecule sorted list of tuples 467 468 macorrflag,macaltloclist,macmodelist,machainlist,macmoldescdict,macmoldict=self.createmol(macmolist,self.macromolecule,rmHetmol=rmHetmol,addH=addH) 469 ligcorrflag,ligaltloclist,ligmodelist,ligchainlist,ligmoldescdict,ligmoldict=self.createmol(ligmolist,self.ligands,rmHetmol=rmHetmol,addH=addH) 470 if not ligcorrflag: 471 if self.ligandb: 472 print 'Warning: Cannot import to %s database!' %self.ligandb 473 self.log.warning('Cannot import to %s database',self.ligandb) 474 self.ligandb=None 475 476 molmodelist=sorted(set(macmodelist)|set(ligmodelist))#molecule model list 477 478 modelid=tabcolvaldict.get('ModelId',None)#model id given by user 479 if modelid: 480 if isinstance(modelid,(int,long)): 481 modelid=[modelid] 482 elif isinstance(modelid,str): 483 try: 484 modelid=int(modelid) 485 except ValueError,e: 486 print 'Error: %s' %e 487 sys.exit(1) 488 modelid=[modelid] 489 elif isinstance(modelid,list): 490 try: 491 modelid=filter(lambda item: isinstance(item,(int,long)),modelid) 492 except ValueError,e: 493 print 'Error: %s' %e 494 sys.exit(1) 495 if not isinstance(modelid,list): 496 print 'Error: ModelId must be list!' 497 self.log.error('ModelId must be list') 498 sys.exit(1) 499 else: 500 modelid=molmodelist 501 502 if len(modelid)!=len(molmodelist): 503 print 'Error: Too many specified ModelId! Please, check your input file.' 504 self.log.error('Too many specified ModelId. Please, check your input file') 505 sys.exit(1) 506 ### get data from HEADER ### 507 ssbondict={} 508 ssbond=self.headict.get('SSBOND',None) 509 if ssbond: 510 ssbondict=self.getssbond(ssbond)#get disulfide bond dictionary 511 ### get Entry ID ### 512 EntryID=self.getEntry(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,headict=self.headict,conflag=True) 513 if not EntryID or len(EntryID)!=1: 514 print 'Error: Incorrect ID for Entry table' 515 self.log.error('Incorrect ID for Entry table') 516 sys.exit(1) 517 ### get Model ID ### 518 ModelIDict=self.getModel(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=EntryID) 519 if not ModelIDict: 520 print 'Error: Incorrect ID for Model table' 521 self.log.error('Incorrect ID for Model table') 522 sys.exit(1) 523 524 for modelitem in modelid:#check if model already in database 525 if ModelIDict.has_key(modelitem): 526 print 'Error: Model: %s for EntryId: %s exists in database!. Please change ModelId value.' %(modelitem,EntryID[0]) 527 self.log.error('Model: %s for EntryId: %s exists in database!. Please change ModelId value',modelitem,EntryID[0]) 528 sys.exit(1) 529 530 ### get Chain ID ### 531 ChainIDict=self.getChain(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=EntryID) 532 if not ChainIDict: 533 print 'Error: Incorrect ID for Chain table' 534 self.log.error('Incorrect ID for Chain table') 535 sys.exit(1) 536 revChainIDict=dict([(int(ChainIDval[0]), ChainIDkey) for ChainIDkey,ChainIDval in ChainIDict.iteritems()])#reverse Chain dictionary 537 ### get Alternate Location ID ### 538 AltLocIDict=self.getAltLoc(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,ID=EntryID) 539 540 ### PDB HEADER extraction ### 541 macmolspecific=self.headict.get('COMPND',None)#get macromolecule COMPND specification from pdb header 542 compndesc={} 543 if macmolspecific: 544 compndesc=self.getmacmolheader(macmolspecific)#macromolecule description dictionary 545 hetatdict={} 546 hetatname=self.headict.get('HETNAM',None)#get heteroatom name 547 if hetatname: 548 hetatdict=self.gethetatname(hetatname)#heteroatom name dictionary 549 formuldict={} 550 formul=self.headict.get('FORMUL',None)#get heteroatom formula 551 if formul: 552 formuldict=self.getformul(formul)#heteroatom formula dictionary 553 hetdict={} 554 het=self.headict.get('HET',None) 555 if het: 556 hetdict=self.gethet(het)#get het description dictionary 557 ssbondict={} 558 ssbond=self.headict.get('SSBOND',None) 559 if ssbond: 560 ssbondict=self.getssbond(ssbond)#get disulfide bond dictionary 561 562 ### prepare molecule dictionary from PDB file 563 moleculedict=self.prepmoldict(macmoldescdict,ligmoldescdict,compndesc,machainlist,hetdict,hetatdict,formuldict) 564 565 ### get Molecule ID ### 566 MoleculeIDict,ProtStatIDict=self.getMolecule(logdebug=logdebug, tabcolvaldict=tabcolvaldict, lowercasetablenames=lowercasetablenames,ID=EntryID,IDict=revChainIDict) 567 if not MoleculeIDict: 568 print 'Error: Incorrect ID for Molecule table' 569 self.log.error('Incorrect ID for Molecule table') 570 sys.exit(1) 571 ### compare molecule dictionary from PDB and MoleculeID dictionary taken from database ### 572 if len(MoleculeIDict)!=len(moleculedict): 573 print 'Error: Different number of molecules! Please, check setting for rmHetmol in addConf function.' 574 self.log.error('Error: Different number of molecules. Please, check setting for rmHetmol in addConf function.') 575 sys.exit(1) 576 for MoleculeKey, MoleculeVal in MoleculeIDict.iteritems(): 577 moltype=MoleculeVal.get('MolType',None)#get molecule type 578 molchain=MoleculeVal.get('MolChain',None)#get molecule chain list 579 molcode=MoleculeVal.get('MolCode',None)#get molecule 3 letter code 580 if moltype!='P':#not protein 581 if moleculedict.has_key(molcode): 582 moleculetype=moleculedict[molcode]['MolType'] 583 if moleculetype!=moltype:#check molecule types 584 print 'Error %s: Different heteromolecule type specification!' 585 self.log.error('%s: Different heteromolecule type specification',MoleculeKey) 586 sys.exit(1) 587 moleculechain=moleculedict[molcode]['MolChain']#get molecule chain 588 if molchain:#if belongs to chain 589 chaindiff=set(molchain)-set(moleculechain)#check chain difference 590 if chaindiff: 591 print 'Error %s: Different heteromolecule chain specification!'%MoleculeKey 592 self.log.error('%s: Different heteromolecule chain specification',MoleculeKey) 593 sys.exit(1) 594 else: 595 print 'Error %s: Unknown heteromolecule code!'%molcode 596 self.log.error('%s: Unknown heteromolecule code!',molcode) 597 sys.exit(1) 598 else:#not protein 599 diflag=True 600 for moleculeval in moleculedict.values(): 601 moleculetype=moleculeval['MolType'] 602 moleculechain=moleculeval['MolChain'] 603 if moleculetype=='P': 604 if molchain:#if belongs to chain 605 chaindiff=set(molchain)-set(moleculechain)#check chain difference 606 if not chaindiff: 607 diflag=False 608 if diflag: 609 print 'Error %s: Different macromolecule chain specifications!' %(MoleculeKey) 610 self.log.error('%s: Different macromolecule chain specification',MoleculeKey) 611 sys.exit(1) 612 613 ## import to Model table ### 614 ModelIDict=self.Model(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,modelist=modelid,ID=EntryID) 615 if not ModelIDict: 616 print 'Error: Incorrect import to Model table' 617 self.log.error('Incorrect import to Model table') 618 sys.exit(1) 619 620 ResIDict['ModelIDict']=ModelIDict 621 ResIDict['ChainIDict']=ChainIDict 622 623 modelflag=None 624 iterator = iter(modelid) 625 for reskey in macmolist: 626 mackey=reskey[0]#macromolecule key 627 macval=macmoldict[mackey]#macromolecule object 628 molmodelid=macval.getmodelid()#get macromolecule model id 629 if molmodelid!=modelflag:#if macromolecule belongs to different model 630 molmodelid=macval.getmodelid()#get macromolecule model id 631 modelflag=molmodelid#set model flag to macromolecule model id 632 try: 633 newmodelid=iterator.next()#get next model id 634 except StopIteration: 635 print 'Error: ModelId iteration error!' 636 self.log.error('ModelId iteration error') 637 sys.exit(1) 638 639 if macval.getype()=='P':#if protein 640 MoleculeID=0 641 chainid=macval.getchainid()#get macromolecule chain id 642 for moleculeidkey,moleculeidval in MoleculeIDict.iteritems():#iterate molecule dictionary 643 if moleculeidval.has_key('MolChain') and (moleculeidval['MolType']=='P'):#if has 'MolChain' and 'MolType' tags 644 if chainid in moleculeidval['MolChain']:#if in molecule chain 645 MoleculeID=[moleculeidkey]#get molecule id 646 moldict=moleculeidval#get molecule dictionary 647 macval.setmodelid(newmodelid)#set new model id 648 if not MoleculeID: 649 print 'Error %s: Molecule Id not available!' %mackey 650 self.log.error('%s: Molecule Id not available.',mackey) 651 sys.exit(1) 652 653 ### import to Residue table ### 654 ResidueID=self.Residue(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,moldict=moldict,ID=MoleculeID,IDict=ResIDict) 655 if not ResidueID: 656 continue 657 658 if rmMacmol: continue # import only to Residue table 659 660 ### import to ReSSBond table ### 661 if ssbond: 662 ReSSBondID=self.ReSSBond(ogdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,ssbondict=ssbondict,ID=ResidueID,IDict=ChainIDict) 663 664 ### import to Atoms table ### 665 ResAtomsID=self.ResAtoms(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,ID=ResidueID,IDict=AltLocIDict) 666 if not ResAtomsID: 667 continue 668 669 ### import to ResAtomStat table ### 670 ResAtomStatID=self.ResAtomStat(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,mackey=mackey,macval=macval,ID=ResidueID,IDlist=ResAtomsID) 671 if not ResAtomStatID: 672 continue 673 674 else:#if not protein 675 MoleculeID=0 676 AtomsID=0 677 molname=macval.getmolname().strip()#get heteromolecule name 678 chainid=macval.getchainid()#get heteromolecule chain id 679 macval.setmodelid(newmodelid)#set new model id 680 for moleculeidkey,moleculeidval in MoleculeIDict.iteritems():#iterate molecule dictionary taken from Molecule table 681 molcode=moleculeidval.get('MolCode',None)#get molecule code 682 if molcode==molname:#molecule code from pdb equals molecule name from database 683 MoleculeID=[moleculeidkey]#get molecule Id 684 moldict=moleculeidval#set molecule dictionary 685 chemcompidfk=moleculeidval.get('ChemCompIdFk',None)#get molecule Ligand.ChemComp foreign key 686 if chemcompidfk:#if exist in Ligand.ChemComp table 687 isosmi=macval.getisosmi()#get isomeric smile 688 if ProtStatIDict.has_key(isosmi):#if Protonation State dictonary has isomeric key 689 ResIDict['ProtStatID']=[ProtStatIDict[isosmi][0]]#get Ligand.ProtStat foreign key 690 dbatlist=ProtStatIDict[isosmi][1]#get Ligand.Atoms list [[atomsidfk,elemidfk],...] 691 molatlist=macval.getsymq()['symq']#get molecule atoms list 692 if len(molatlist)!=len(dbatlist): 693 if not chainid.isalpha(): 694 print 'Error: Different number of atoms in molecule: %s, model: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,macval.getmodelid(),macval.getmolid()) 695 self.log.error('Different number of atoms in molecule: %s, model: %s, molecule id: %s. Please, check setting for addH in addConf function.',molname,macval.getmodelid(),macval.getmolid()) 696 else: 697 print 'Error: Different number of atoms in molecule: %s, model: %s, chain: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,macval.getmodelid(),chainid,macval.getmolid()) 698 self.log.error('Different number of atoms in molecule: %s, model: %s, chain: %s, molecule id: %s.Please, check setting for addH in addConf function.',molname,macval.getmodelid(),chainid,macval.getmolid()) 699 sys.exit(1) 700 ### compare molecule atom list with Ligand.Atoms list ### 701 if not all(map(lambda item: item[0][1]==item[1][1],zip(dbatlist,molatlist))): 702 if not chainid.isalpha(): 703 print 'Error: Different atoms list in molecule: %s, model: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,macval.getmodelid(),macval.getmolid()) 704 self.log.error('Different atoms list in molecule: %s, model: %s, molecule id: %s. Please, check setting for addH in addConf function.',molname,macval.getmodelid(),macval.getmolid()) 705 else: 706 print 'Error: Different atoms list in molecule: %s, model: %s, chain: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,macval.getmodelid(),chainid,macval.getmolid()) 707 self.log.error('Different atoms list in molecule: %s, model: %s, chain: %s, molecule id: %s. Please, check setting for addH in addConf function.',molname,macval.getmodelid(),chainid,macval.getmolid()) 708 sys.exit(1) 709 AtomsID=[atomitem[0] for atomitem in dbatlist] 710 711 ### import to HetMol table ### 712 HetMolID=self.HetMol(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,hetkey=mackey,hetval=macval,moldict=moldict,ID=MoleculeID,IDict=ResIDict,IDlist=AtomsID) 713 if not HetMolID: 714 if not chainid.isalpha(): 715 print 'Error: Incorrect import to HetMol table for molecule: %s, model: %s, molecule id: %s!' %(molname,macval.getmodelid(),macval.getmolid()) 716 self.log.error('Incorrect import to HetMol table for molecule: %s, model: %s, molecule id: %s',molname,macval.getmodelid(),macval.getmolid()) 717 else: 718 print 'Error: Incorrect import to HetMol table for molecule: %s, model: %s, chain: %s, molecule id: %s!' %(molname,macval.getmodelid(),chainid,macval.getmolid()) 719 self.log.error('Incorrect import to HetMol table for molecule: %s, model: %s, chain: %s, molecule id: %s.',molname,macval.getmodelid(),chainid,macval.getmolid()) 720 sys.exit(1) 721 722 modelflag=None 723 iterator = iter(modelid) 724 for reskey in ligmolist: 725 hetkey=reskey[0]#heteromolecule key 726 hetval=ligmoldict[hetkey]#heteromolecule object 727 molmodelid=hetval.getmodelid()#get macromolecule model id 728 if molmodelid!=modelflag:#if macromolecule belongs to different model 729 molmodelid=hetval.getmodelid()#get macromolecule model id 730 modelflag=molmodelid#set model flag to macromolecule model id 731 try: 732 newmodelid=iterator.next()#get next model id 733 except StopIteration: 734 print 'Error: ModelId iteration error!' 735 self.log.error('ModelId iteration error') 736 sys.exit(1) 737 MoleculeID=0 738 AtomsID=0 739 molname=hetval.getmolname().strip()#get heteromolecule name 740 chainid=hetval.getchainid()#get heteromolecule chain id 741 hetval.setmodelid(newmodelid)#set new model id 742 for moleculeidkey,moleculeidval in MoleculeIDict.iteritems():#iterate molecule dictionary taken from Molecule table 743 molcode=moleculeidval.get('MolCode',None)#get molecule code 744 if molcode==molname:#molecule code from pdb equals molecule name from database 745 MoleculeID=[moleculeidkey]#get molecule Id 746 moldict=moleculeidval#set molecule dictionary 747 chemcompidfk=moleculeidval.get('ChemCompIdFk',None)#get molecule Ligand.ChemComp foreign key 748 if chemcompidfk:#if exist in Ligand.ChemComp table 749 isosmi=hetval.getisosmi()#get isomeric smile 750 if ProtStatIDict.has_key(isosmi):#if Protonation State dictonary has isomeric key 751 ResIDict['ProtStatID']=[ProtStatIDict[isosmi][0]]#get Ligand.ProtStat foreign key 752 dbatlist=ProtStatIDict[isosmi][1]#get Ligand.Atoms list [[atomsidfk,elemidfk],...] 753 molatlist=hetval.getsymq()['symq']#get molecule atoms list 754 if len(molatlist)!=len(dbatlist): 755 if not chainid.isalpha(): 756 print 'Error: Different number of atoms in molecule: %s, model: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,hetval.getmodelid(),hetval.getmolid()) 757 self.log.error('Different number of atoms in molecule: %s, model: %s, molecule id: %s. Please, check setting for addH in addConf function.',molname,hetval.getmodelid(),hetval.getmolid()) 758 else: 759 print 'Error: Different number of atoms in molecule: %s, model: %s, chain: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,hetval.getmodelid(),chainid,hetval.getmolid()) 760 self.log.error('Different number of atoms in molecule: %s, model: %s, chain: %s, molecule id: %s.Please, check setting for addH in addConf function.',molname,hetval.getmodelid(),chainid,hetval.getmolid()) 761 sys.exit(1) 762 ### compare molecule atom list with Ligand.Atoms list ### 763 if not all(map(lambda item: item[0][1]==item[1][1],zip(dbatlist,molatlist))): 764 if not chainid.isalpha(): 765 print 'Error: Different atoms list in molecule: %s, model: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,hetval.getmodelid(),hetval.getmolid()) 766 self.log.error('Different atoms list in molecule: %s, model: %s, molecule id: %s. Please, check setting for addH in addConf function.',molname,hetval.getmodelid(),hetval.getmolid()) 767 else: 768 print 'Error: Different atoms list in molecule: %s, model: %s, chain: %s, molecule id: %s! Please, check setting for addH in addConf function.' %(molname,macval.getmodelid(),chainid,macval.getmolid()) 769 self.log.error('Different atoms list in molecule: %s, model: %s, chain: %s, molecule id: %s. Please, check setting for addH in addConf function.',molname,macval.getmodelid(),chainid,macval.getmolid()) 770 sys.exit(1) 771 AtomsID=[atomitem[0] for atomitem in dbatlist] 772 773 ### import to HetMol table ### 774 HetMolID=self.HetMol(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,hetkey=hetkey,hetval=hetval,moldict=moldict,ID=MoleculeID,IDict=ResIDict,IDlist=AtomsID) 775 if not HetMolID: 776 if not chainid.isalpha(): 777 print 'Error: Incorrect import to HetMol table for molecule: %s, model: %s, molecule id: %s!' %(molname,hetval.getmodelid(),hetval.getmolid()) 778 self.log.error('Incorrect import to HetMol table for molecule: %s, model: %s, molecule id: %s',molname,hetval.getmodelid(),hetval.getmolid()) 779 else: 780 print 'Error: Incorrect import to HetMol table for molecule: %s, model: %s, chain: %s, molecule id: %s!' %(molname,hetval.getmodelid(),chainid,hetval.getmolid()) 781 self.log.error('Incorrect import to HetMol table for molecule: %s, model: %s, chain: %s, molecule id: %s.',molname,hetval.getmodelid(),chainid,hetval.getmolid()) 782 sys.exit(1) 783 784 print 'Conformer insertion done.' 785 self.closeDB()#close database connection
786
787 - def getmacmolheader(self,macmolspecific):
788 """ 789 get macromolecular contents from pdb COMPND records 790 INPUT: 791 macmolspecific - list of records from pdb COMPND tag 792 OUTPUT: 793 macmoldescdict - dict, macromolecular description dictionary 794 """ 795 tags=['MOL_ID','MOLECULE','CHAIN','FRAGMENT','SYNONYM','EC','ENGINEERED','MUTATION','OTHER_DETAILS']#pdb tokens 796 idmoldict={} 797 t=0 798 lenmacmolspecific=len(macmolspecific) 799 while t<lenmacmolspecific: 800 specitem=macmolspecific[t] 801 molididx=specitem.find(tags[0])#find MOL_ID index 802 if molididx!=-1: 803 idmolist=[] 804 molid=specitem[molididx+len(tags[0]):]#molecule id 805 lenmolid=len(molid)#molecule string length 806 i=0 807 idmol='' 808 while i<lenmolid: 809 if molid[i].isdigit():#if character is digit 810 idmol+=molid[i]#join strings 811 i+=1 812 idmolist.append('P') 813 idmolist.append(idmol)#add to idmol list 814 j=t+1 815 while (j<lenmacmolspecific) and (macmolspecific[j].find(tags[0])==-1): 816 t=j#set new t value 817 molnameidx=macmolspecific[j].find(tags[1])#get MOLECULE from COMPN list 818 if molnameidx!=-1: 819 molecule=macmolspecific[j][molnameidx+len(tags[1]):] 820 if molecule.startswith(':'): molecule=molecule[1:].lstrip() 821 if not molecule.endswith(';'): 822 nextline=macmolspecific[j+1] 823 nextlinelist=[] 824 for tag in tags[2:]: 825 if nextline.find(tag)==-1: 826 nextlinelist.append(True) 827 else: 828 nextlinelist.append(False) 829 if all(nextlinelist): 830 if nextline[0].isdigit(): 831 nextline=nextline[1:] 832 if nextline.endswith(';'): 833 nextline=nextline[:-1].rstrip() 834 molecule='%s%s' %(molecule,nextline) 835 else: 836 molecule=molecule[:-1].rstrip() 837 idmolist.append(molecule) 838 molchainidx=macmolspecific[j].find(tags[2])#get CHAIN form COMPN list 839 if molchainidx!=-1: 840 chain=macmolspecific[j][molchainidx+len(tags[2]):] 841 if chain.startswith(':'): chain=chain[1:].lstrip() 842 if chain.endswith(';'):chain=chain[:-1] 843 chainlist=chain.split(',') 844 chainlist=[chainame.strip() for chainame in chainlist] 845 idmolist.append(chainlist) 846 j+=1 847 idmoldict[idmol]=idmolist 848 t+=1 849 return idmoldict
850
851 - def getorigx(self,origx,keyname,transvecname,number):
852 """ 853 get transformation matrix from orthogonal to submitted coordinates 854 INPUT: 855 orig -list, list of transformation items 856 keyname - str, dictionary key name 857 transvecname - str, transformation vector name 858 number - int, matrix row number 859 OUTPUT: 860 origxdict - dict, transformation dictionary 861 """ 862 origxdict={} 863 x='%s%s1'%(keyname,number) 864 y='%s%s2'%(keyname,number) 865 z='%s%s3'%(keyname,number) 866 t='%s%s'%(transvecname,number) 867 origx=origx[0].split(' ') 868 origx=[origxitem for origxitem in origx if origxitem] 869 try: 870 origxdict[x]=origx[0] 871 origxdict[y]=origx[1] 872 origxdict[z]=origx[2] 873 origxdict[t]=origx[3] 874 except IndexError: 875 return {} 876 else: 877 return origxdict
878
879 - def getscale(self,origx,keyname,transvecname,number):
880 """ 881 get transformation matrix from orthogonal to fractional crystallogrpahic coordinates 882 INPUT: 883 orig -list, list of transformation items 884 keyname - str, dictionary key name 885 transvecname - str, transformation vector name 886 number - int, matrix row number 887 OUTPUT: 888 origxdict - dict, transformation dictionary 889 """ 890 return self.getorigx(origx,keyname,transvecname,number)
891
892 - def getmatrix(self,mtrix,number):
893 """ 894 get transformation matrix expressing non-crystallographic symmetry 895 INPUT: 896 mtrix -list, list of transformation items 897 number - int, matrix row number 898 OUTPUT: 899 mtrxdict - dict, transformation dictionary 900 """ 901 mtrixdict={} 902 sernum='SerNum' 903 m1='M%s1'%(number) 904 m2='M%s2'%(number) 905 m3='M%s3'%(number) 906 v='V%s'%(number) 907 g='iG%s'%(number) 908 mtrixkeys=[sernum,m1,m2,m3,v,g] 909 for mtrixitem in mtrix: 910 mtrxitem=mtrixitem.split(' ') 911 mtrxitem=[mtitem for mtitem in mtrxitem if mtitem] 912 for i in range(len(mtrixkeys)): 913 try: 914 itemval=mtrxitem[i] 915 if mtrixdict.has_key(mtrixkeys[i]): 916 mtrixval=mtrixdict[mtrixkeys[i]] 917 mtrixval.append(itemval) 918 else: 919 templist=[] 920 templist.append(itemval) 921 mtrixdict[mtrixkeys[i]]=templist 922 except IndexError: 923 return {} 924 return mtrixdict
925
926 - def getdbref(self,dbref):
927 """ 928 get cross-reference database links 929 INPUT: 930 dbref - list, list of reference items 931 OUTPUT: 932 dbreflist - list, database reference list 933 """ 934 dbreflist=[] 935 for dbrefitem in dbref: 936 tempdict={} 937 chainid=dbrefitem[5] 938 if chainid.isalpha(): 939 tempdict['chainid']=chainid 940 resid=dbrefitem[7:11].strip() 941 if not resid.isspace(): 942 tempdict['SeqBeg']=resid 943 resinscode=dbrefitem[11] 944 if resinscode.isalpha(): 945 tempdict['InsBeg']=resinscode 946 seqend=dbrefitem[13:17].strip() 947 if not seqend.isspace(): 948 tempdict['SeqEnd']=seqend 949 insend=dbrefitem[17] 950 if insend.isalpha(): 951 tempdict['InsEnd']=insend 952 dbname=dbrefitem[19:25].strip() 953 if not dbname.isspace(): 954 tempdict['DBName']=dbname 955 dbaccess=dbrefitem[25:33].strip() 956 if not dbaccess.isspace(): 957 tempdict['DBAccess']=dbaccess 958 dbidcode=dbrefitem[35:46].strip() 959 if not dbidcode.isspace(): 960 tempdict['DBCode']=dbidcode 961 dbseqbeg=dbrefitem[48:53].strip() 962 if not dbseqbeg.isspace(): 963 tempdict['DBSeqBeg']=dbseqbeg 964 dbinsbeg=dbrefitem[53] 965 if dbinsbeg.isalpha(): 966 tempdict['DBInsBeg']=dbinsbeg 967 dbseqend=dbrefitem[55:60].strip() 968 if not dbseqend.isspace(): 969 tempdict['DBSeqEnd']=dbseqend 970 try: 971 dbinsend=dbrefitem[60] 972 except IndexError: 973 dbinsend=' ' 974 if dbinsend.isalpha(): 975 tempdict['DBInsEnd']=dbinsend 976 dbreflist.append(tempdict) 977 return dbreflist
978
979 - def getseqadv(self,seqadv):
980 """ 981 get conflicts between SEQRES and DBREF 982 INPUT: 983 seqadv - list, list of conflicts items 984 OUTPUT: 985 seqadvlist - list, conflicts list 986 """ 987 seqadvlist=[] 988 for seqadvitem in seqadv: 989 tempdict={} 990 resname=seqadvitem[5:8] 991 if not resname.isspace(): 992 tempdict['ResName']=resname 993 chainid=seqadvitem[9] 994 if chainid.isalpha(): 995 tempdict['chainid']=chainid 996 seqnum=seqadvitem[11:15].strip() 997 if not seqnum.isspace(): 998 tempdict['SeqNum']=seqnum 999 inscode=seqadvitem[15] 1000 if inscode.isalpha(): 1001 tempdict['InsCode']=inscode 1002 dbname=seqadvitem[17:21].strip() 1003 if not dbname.isspace(): 1004 tempdict['DBName']=dbname 1005 dbcode=seqadvitem[22:31].strip() 1006 if not dbcode.isspace(): 1007 tempdict['DBCode']=dbcode 1008 dbresname=seqadvitem[32:35] 1009 if not dbresname.isspace(): 1010 tempdict['DBResName']=dbresname 1011 dbseqnum=seqadvitem[36:41].strip() 1012 if not dbseqnum.isspace(): 1013 tempdict['DBSeqNum']=dbseqnum 1014 conflict=seqadvitem[42:].strip() 1015 if not conflict.isspace(): 1016 tempdict['Conflict']=conflict 1017 seqadvlist.append(tempdict) 1018 return seqadvlist
1019
1020 - def getjournal(self,jrnl):
1021 """ 1022 get journal dictionary 1023 INPUT: 1024 jrnl - list of records from pdb JRNL tag 1025 OUTPUT: 1026 journaldict - dict, journal dictionary 1027 """ 1028 tags=['AUTH','TITL','EDIT','REF','PUBL','REFN']#pdb tokens 1029 journaldict={} 1030 for jrnlitem in jrnl: 1031 jrnlitem=jrnlitem.split(' ') 1032 tag=jrnlitem[0] 1033 if tag in tags: 1034 jrnlitem=' '.join(jrnlitem[1:]) 1035 jrnlitem=jrnlitem.strip() 1036 jrnlitem=jrnlitem.split(' ') 1037 if not jrnlitem[0].isdigit(): 1038 journaldict[tag]=' '.join(jrnlitem[0:]) 1039 else: 1040 if journaldict.has_key(tag): 1041 journalval=journaldict[tag] 1042 nextline=' '.join(jrnlitem[1:]) 1043 journalval='%s %s'%(journalval,nextline) 1044 journaldict[tag]=journalval 1045 1046 if journaldict.has_key('REF'): 1047 refval=journaldict['REF'] 1048 del journaldict['REF'] 1049 pubname=refval[0:28].rstrip() 1050 if not pubname.isspace(): 1051 journaldict['JRNL']=pubname 1052 volume=refval[32:36].strip() 1053 if volume.isdigit(): 1054 journaldict['VOL']=volume 1055 page=refval[37:42].strip() 1056 if page.isdigit(): 1057 journaldict['PAGE']=page 1058 year=refval[43:48].strip() 1059 if year.isdigit(): 1060 journaldict['YEAR']=year 1061 return journaldict
1062
1063 - def gethetatname(self,hetatname):
1064 """ 1065 get heteroatom name dictionary 1066 INPUT: 1067 hetatname - list, list of records from pdb HETNAM tag 1068 OUTPUT: 1069 hetatnamedict - dict, heteroatom name dictionary 1070 """ 1071 hetatnamedict={} 1072 for hetat in hetatname: 1073 hetat=hetat.split(' ') 1074 if not hetat[0].isdigit():#if not continuation line 1075 hetatnamedict[hetat[0]]=' '.join(hetat[1:])#join strings in list 1076 else:#if continuation line 1077 if hetatnamedict.has_key(hetat[1]):#starts with digit 1078 hetatval=hetatnamedict[hetat[1]]#heteroatom value 1079 nextline=' '.join(hetat[2:])#create string 1080 hetatval='%s %s' %(hetatval,nextline)#join strings in list 1081 hetatnamedict[hetat[1]]=hetatval#assign new value to dictionary 1082 return hetatnamedict
1083
1084 - def getitle(self,title,key):
1085 """ 1086 get title dictionary 1087 INPUT: 1088 title - list, list of records from pdb TITLE tag 1089 OUTPUT: 1090 titledict - dict, title dictionary 1091 """ 1092 titledict={} 1093 for titleitem in title: 1094 titleitem=titleitem.split(' ') 1095 if not titleitem[0].isdigit():#if not continuation line 1096 titledict[key]=' '.join(titleitem[0:])#join strings in list 1097 else: 1098 if titledict.has_key(key):#starts with digit 1099 titleval=titledict[key]#title value 1100 nextline=' '.join(titleitem[1:])#create string 1101 titleval='%s %s'%(titleval,nextline)#join strings in list 1102 titledict[key]=titleval 1103 return titledict
1104 1105 getkeyword=getitle 1106 getexperimdat=getitle 1107
1108 - def getformul(self,formul):
1109 """ 1110 get formula dictionary 1111 INPUT: 1112 formul - list, list of records from pdb FORMUL tag 1113 OUTPUT: 1114 formuldict - dict, formula dictionary 1115 """ 1116 formuldict={} 1117 for form in formul: 1118 if form[0:2].strip().isdigit():#if component number exists 1119 form=form[2:].lstrip()#remove component number 1120 form='%s %s %s' %(form[4:6],form[0:3],form[6:])#rebuilt string 1121 form=form.lstrip()#remove leading spaces 1122 form=form.split(' ')#split string 1123 if not form[0].isdigit():#not continuation line 1124 formuldict[form[0]]=' '.join(form[1:]).strip()#add to dictionary 1125 else:#continuation line 1126 if formuldict.has_key(form[1]):#if exists in dictionary 1127 formval=formuldict[form[1]]#get dictionary value 1128 nextline=' '.join(form[2:]).lstrip()#join lines 1129 formval='%s %s' %(formval,nextline)#join dictionary value with continuation lines 1130 formuldict[form[1]]=formval.strip()#update dictionary 1131 return formuldict
1132
1133 - def getseqres(self,seqres):
1134 """ 1135 get sequence of residues in each chain 1136 INPUT: 1137 seqres - list, list of records from pdb SEQRES tag 1138 OUTPUT: 1139 seqresdict - dict, sequence of residues dictionary 1140 """ 1141 seqresdict={}# 1142 for seq in seqres: 1143 if seq[0:2].strip().isdigit():#if component number exists 1144 seq=seq[2:].lstrip()#remove component number 1145 if seq[0].isalpha():#if char is alphanumeric 1146 chainseq=seq[8:].split(' ')#split string 1147 if not seqresdict.has_key(seq[0]):#if key not exists in dictionary 1148 seqresdict[seq[0]]=chainseq#assign in dictionary 1149 else:#key exists in dictionary 1150 seqval=seqresdict[seq[0]]#get key value 1151 seqval.extend(chainseq)#extend list 1152 seqresdict[seq[0]]=seqval#update dictionary 1153 return seqresdict
1154
1155 - def gethet(self,het):
1156 """ 1157 get het molecule description 1158 INPUT: 1159 het - list, list of records from pdb HET tag 1160 OUTPUT: 1161 hetdict - dict, het description dictionary 1162 """ 1163 hetdict={} 1164 for hetitem in het: 1165 if hetitem[0:3].strip().isalpha():#check if alphanumeric 1166 hetname=hetitem[0:3].strip()#get heteromolecule name 1167 hetitem=hetitem[4:].lstrip()#lstrip string 1168 if hetitem[0].isalpha():#if char is alphanumeric 1169 chainame=hetitem[0]#get chain name 1170 else: 1171 chainame=' ' 1172 if not hetdict.has_key(hetname):#if not in dictionary 1173 hetdict[hetname]=[chainame] 1174 else:#if exists in dictionary 1175 hetval=hetdict[hetname]#heteromolecule value 1176 if chainame not in hetval: 1177 hetval.append(chainame) 1178 hetdict[hetname]=hetval 1179 return hetdict
1180
1181 - def getssbond(self,ssbond):
1182 """ 1183 get ssbond molecule description 1184 INPUT: 1185 ssbond - list, list of records from pdb SSBOND tag 1186 OUTPUT: 1187 ssbondict - dict, symmetry disulfide bond dictionary 1188 """ 1189 ssbondict={} 1190 for ssbonditem in ssbond: 1191 ssbonditem=' '.join(ssbonditem.split(' ')[1:]) 1192 resname1=ssbonditem[0:3] 1193 chainid1=ssbonditem[4] 1194 resid1=ssbonditem[6:10].strip() 1195 reseqnum1=resid1 1196 resinscode1=ssbonditem[10] 1197 resname2=ssbonditem[14:17] 1198 chainid2=ssbonditem[18] 1199 resid2=ssbonditem[20:24].strip() 1200 reseqnum2=resid2 1201 try: 1202 resinscode2=ssbonditem[24] 1203 except IndexError: 1204 resinscode2=' ' 1205 1206 if chainid1.isspace(): 1207 if resinscode1.isspace(): 1208 resid1='%s' %(resid1) 1209 else: 1210 resid1='%s%s' %(resid1,resinscode1) 1211 else: 1212 if resinscode1.isspace(): 1213 resid1='%s_%s' %(chainid1,resid1) 1214 else: 1215 resid1='%s_%s%s'%(chainid1,resid1,resinscode1) 1216 if chainid2.isspace(): 1217 if resinscode2.isspace(): 1218 resid2='%s' %(resid2) 1219 else: 1220 resid2='%s%s' %(resid2,resinscode2) 1221 else: 1222 if resinscode2.isspace(): 1223 resid2='%s_%s' %(chainid2,resid2) 1224 else: 1225 resid2='%s_%s%s'%(chainid2,resid2,resinscode2) 1226 ssbondict[resid1]=[resname2,chainid2,reseqnum2,resinscode2] 1227 ssbondict[resid2]=[resname1,chainid1,reseqnum1,resinscode1] 1228 return ssbondict
1229
1230 - def getheader(self,header):
1231 """ 1232 get header molecule description 1233 INPUT: 1234 ssbond - list, list of records from pdb SSBOND tag 1235 OUTPUT: 1236 ssbondict - dict, symmetry disulfide bond dictionary 1237 """ 1238 headict={} 1239 for headitem in header: 1240 classification=headitem[0:40].rstrip() 1241 if classification: 1242 headict['Class']=classification 1243 depdate=headitem[40:49] 1244 if depdate: 1245 headict['DepositDate']=depdate 1246 return headict
1247
1248 - def prepmoldict(self,macmoldescdict={},ligmoldescdict={},compndesc={},machainlist={},hetdict={},hetatdict={},formuldict={}):
1249 """ 1250 prepare molecule table input dictionary 1251 INPUT: 1252 macmoldescdict - dict, macromolecule description dictionary 1253 ligmoldescdict - dict, ligand description dictionary 1254 compdesc - dict, macromolecule description dictionary from pdb COMPND tags 1255 machainlist - list, macromolecule chain list 1256 hetdict - dict, het description dictionary 1257 OUTPUT: 1258 moldict - dict, molecule table input dictionary 1259 """ 1260 moldict={} 1261 for macname, macdesc in macmoldescdict.iteritems():#iterate macromolecules,water and ions 1262 macname=macname.strip()#strip macromolecule name 1263 if macdesc[0]=='P':#if protein 1264 if moldict.has_key('1'): continue#if exits in dictionary 1265 moldict['1']={'MolType':'P','MolId':'1','MolName':'PROTEIN','MolChain':sorted(machainlist)} 1266 else:#if not exists in dictionary 1267 if macdesc[0]=='I':#if ion 1268 molname='ION'#set molecule name 1269 elif macdesc[0]=='W':#if water 1270 molname='SOLVENT'#set molecule name 1271 elif macdesc[0]=='A':#if atom 1272 molname='ATOM' 1273 else: 1274 molname=None 1275 macdesc={'MolType':macdesc[0],'MolCode':macdesc[1].strip(),'MolName':molname,'MolWeight':str(macdesc[2]),'MolCharge':str(macdesc[3]),'MolFormula':macdesc[4],'MolBonds':macdesc[5],'MolSymQ':macdesc[6],'IsoSmi':macdesc[7],'MolChain':sorted(macdesc[8])} 1276 moldict[macname]=macdesc#assign to molecule dictionary 1277 1278 for ligname, ligdesc in ligmoldescdict.iteritems():#iterate hetero molecules 1279 ligname=ligname.strip()#strip ligand name 1280 ligdesc={'MolType':ligdesc[0],'MolCode':ligdesc[1].strip(),'MolWeight':str(ligdesc[2]),'MolCharge':str(ligdesc[3]),'MolFormula':ligdesc[4],'MolBonds':ligdesc[5],'MolSymQ':ligdesc[6],'IsoSmi':ligdesc[7],'MolChain':sorted(ligdesc[8])} 1281 moldict[ligname]=ligdesc#assing to molecule dictionary 1282 1283 if compndesc:#if COMPN tag in pdb file exists 1284 if moldict.has_key('1'): del moldict['1']#delete from dictionary macromolecule 1285 for macid, maclist in compndesc.iteritems():#iterate description dictionary 1286 maclist={'MolType':maclist[0],'MolId':maclist[1],'MolName':maclist[2],'MolChain':sorted(maclist[3])} 1287 compndesc[macid]=maclist#assign to molecule list 1288 moldict.update(compndesc)#update molecule dictionary 1289 1290 if hetatdict:#if HETAT tag in pdb file exists 1291 for hetat, hetatname in hetatdict.iteritems(): 1292 if moldict.has_key(hetat):#if key in molecule dictionary 1293 moldict[hetat]['MolName']=hetatname#assign new value 1294 1295 if formuldict:#if FORMUL tag in pdb file exists 1296 for formkey,formval in formuldict.iteritems(): 1297 if moldict.has_key(formkey):#if key in molecule dictionary 1298 moldict[formkey]['MolFormula']=formval#assign new value 1299 1300 if hetdict:#if HET tag in pdb file exists 1301 for hetkey, hetval in hetdict.iteritems(): 1302 if moldict.has_key(hetkey):#if key in molecule dictionary 1303 moldict[hetkey]['MolChain']=sorted(hetval)#assign new value 1304 1305 return moldict
1306
1307 - def getmol(self,repflag=True,rmHetmol=False):
1308 """ 1309 get ligand and macromolecule sorted list of tuples from pdb file 1310 INPUT: 1311 class object 1312 repflag - boolean, residue flag, default True 1313 rmHetmol - boolean, remove heteromolecule, default False 1314 OUTPUT: 1315 ligand and macromolecule sorted list of tuples 1316 """ 1317 headict,ligands,macromolecule=PDBFile(pdbfilepath=self.pdbfilepath,repflag=repflag) 1318 self.headict=headict 1319 self.macromolecule=macromolecule 1320 if rmHetmol: 1321 self.macromolecule=dict([(molkey , molobj) for molkey, molobj in self.macromolecule.iteritems() if not molobj.gethetflag()]) 1322 self.ligands=ligands 1323 moldict=dict([(molkey, (molobj.getmodelid(),molobj.getchainid(),molobj.getmolid(),molobj.getmolinscode())) for molkey, molobj in self.macromolecule.iteritems()])#molecule dictionary - chain_molecule id : molecule id 1324 moldict=sortdictval(moldict)#sort dictionary according values 1325 ligdict=dict([(ligkey, (ligobj.getmodelid(),ligobj.getchainid(),ligobj.getmolid(),molobj.getmolinscode())) for ligkey, ligobj in self.ligands.iteritems()]) 1326 ligdict=sortdictval(ligdict)#sort dictionary according values 1327 return ligdict,moldict
1328
1329 - def isTabDesCorr(self,tablename,tabcol):
1330 """ 1331 check macromolecule table description with table name 1332 INPUT: 1333 tablename- name of table 1334 tabcol - list of column names 1335 OUTPUT: 1336 boolean 1337 """ 1338 try: 1339 colname=self.tabdesc[tablename].getColName()#get list of columns 1340 except KeyError,error: 1341 print 'Error, %s: Missing description for %s!' %(tablename,error) 1342 self.log.exception('%s: Missing description for %s',tablename,error) 1343 return False 1344 if len(colname)<len(tabcol):#check number of arguments 1345 print 'Error, %s: Too many arguments in table column list!!'%tablename 1346 self.log.error('%s: Too many arguments in table column list',tablename) 1347 return False 1348 else: 1349 for name in tabcol: 1350 if name in colname:#check if key in column list 1351 continue 1352 else: 1353 print 'Error, %s: Incorrect column name for %s!'%(tablename,name) 1354 self.log.error('%s: Incorrect column name for %s',tablename,name) 1355 return False 1356 return True
1357
1358 - def isMacMolTabDesCorr(self,tablename,tabcol):
1359 """ 1360 check macromolecule table description with table name 1361 INPUT: 1362 tablename- name of table 1363 tabcol - list of column names 1364 OUTPUT: 1365 boolean 1366 """ 1367 try: 1368 colname=self.macmoltabdesc[tablename].getColName()#get list of columns 1369 except KeyError,error: 1370 print 'Error, %s: Missing description for %s!' %(tablename,error) 1371 self.log.exception('%s: Missing description for %s',tablename,error) 1372 return False 1373 if len(colname)<len(tabcol):#check number of arguments 1374 print 'Error, %s: Too many arguments in table column list!!'%tablename 1375 self.log.error('%s: Too many arguments in table column list',tablename) 1376 return False 1377 else: 1378 for name in tabcol: 1379 if name in colname:#check if key in column list 1380 continue 1381 else: 1382 print 'Error, %s: Incorrect column name for %s!'%(tablename,name) 1383 self.log.error('%s: Incorrect column name for %s',tablename,name) 1384 return False 1385 return True
1386
1387 - def createmol(self,molist,moldict,rmHetmol=True,addH=False):
1388 """ 1389 create oe molecule object class 1390 INPUT: 1391 molist - list, sorted molecule list of tuples 1392 moldict - dict, molecule dictionary 1393 rmHetmol - boolean, remove hetero molecule, default True 1394 addH - boolean, add hydrogens, default False 1395 OUTPUT: 1396 altloclist - list, alternate location list 1397 modelist - list, model list 1398 chainlist - list, chain list 1399 moldescdict - dict, molecule description dictionary 1400 molnamedict - dict, molecule name dictionary 1401 """ 1402 altloclist=[]#alternate location list 1403 modelist=[]# model list 1404 chainlist=[]#chain list 1405 moldescdict={}#molecule description dictionary 1406 molnamedict={}#molecule dictionary 1407 mol=OECreateOEMol()#create oe molecule object 1408 mol1=OEGraphMol()#create oe molecule object 1409 corrflag=True#correction flag 1410 corrlist=[]#correction list 1411 for molitem in molist: 1412 mol.Clear()#clear molecule 1413 mackey=molitem[0]#molecule key 1414 macobj=moldict[mackey]#molecule object 1415 atoms=macobj.getmolatoms()#get atom object dictionary 1416 modelid=macobj.getmodelid()#get model id 1417 if modelid not in modelist:#get model list 1418 modelist.append(modelid) 1419 chainid=macobj.getchainid()#get chain id 1420 if chainid not in chainlist:#create chain list 1421 if chainid.isalpha():#get only characters not empty string 1422 chainlist.append(chainid) 1423 molname=macobj.getmolname()#get residue name 1424 molid=macobj.getmolid()# get residue id 1425 molinscode=macobj.getmolinscode()#get residue insertion code 1426 molfragnum=macobj.getmolfragnum()# get residue fragment number 1427 molsecstruct=macobj.getmolsecstruct()#get residue secondary structure 1428 molatnum=macobj.getmolatnum()#get residue number of atoms 1429 moltypeflag=True#if True residue type 'P' - protein, else 'M' - non -polymer type 1430 if atoms:# if atoms exist 1431 atdict=dict([(atkey, atobj.getatsernum()) for atkey, atobj in atoms.iteritems()])#atom dictionary - atom resideu id : atom serial number 1432 atdict=sortdictval(atdict)#sorted atom list of tuples 1433 for atitem in atdict:#iterate list of tuples 1434 atkey=atitem[0]#atom key 1435 atobj=atoms[atkey]#atom object 1436 atname=atobj.getatname()#get atom id name 1437 atsymbol=OEGetAtomicSymbol(atobj.getatnum())#get atom symbol 1438 atcoords=atobj.getatcoords()#get atom coordinates 1439 atbfactor=atobj.getatbfactor()#get atom b factor 1440 atishet=atobj.ishetat()#get heteroatom flag 1441 if atishet:# change moltype flag if any atom is heteroatom 1442 moltypeflag=False 1443 atoccup=atobj.getatoccup()#get atom occupancy 1444 atsernum=atobj.getatsernum()#get atom serial number 1445 ataltloc=atobj.getataltloc()#get atom alternate location 1446 if ataltloc!=" ":#get alternative location list 1447 atname=atname[:-2] 1448 if ataltloc not in altloclist: 1449 altloclist.append(ataltloc) 1450 if not (atcoords or atsymbol):#not atom coordinates or atom symbol 1451 print 'Error: No atomic coordinates or atomic symbol available!' 1452 sys.exit(1) 1453 atype='OEElemNo_%s'%atsymbol#atom symbol 1454 newat=mol.NewAtom(eval(atype))#create new oe atom 1455 at=OEAtomGetResidue(newat) 1456 if ataltloc is not None: 1457 at.SetAlternateLocation(ataltloc) 1458 if atbfactor is not None: 1459 at.SetBFactor(atbfactor)#set atom b factor 1460 if chainid is not None: 1461 at.SetChainID(chainid)#set atom chain id 1462 if molfragnum is not None: 1463 at.SetFragmentNumber(molfragnum)#set atom fragment number 1464 if atishet is not None: 1465 at.SetHetAtom(atishet)#set atom heteroatom flag 1466 if molinscode is not None: 1467 at.SetInsertCode(molinscode)#set atom insertion code 1468 if modelid is not None: 1469 at.SetModelNumber(modelid)#set atom model id 1470 if molname is not None: 1471 at.SetName(molname)#set atom residue name 1472 if atname is not None: 1473 newat.SetName(atname)#set atom name 1474 if molid is not None: 1475 at.SetResidueNumber(molid)# set atom residue id 1476 if atoccup is not None: 1477 at.SetOccupancy(atoccup)# set atom occupancy 1478 if molsecstruct is not None: 1479 at.SetSecondaryStructure(molsecstruct)#set atom residue secondary structure 1480 if atsernum is not None: 1481 at.SetSerialNumber(atsernum)#set atom serial number 1482 OEAtomSetResidue(newat,at)# set atom features to new atom 1483 mol.SetCoords(newat,atcoords)# set coordinates to oe atom 1484 OEDetermineConnectivity(mol)#determinate connectivity 1485 OEFindRingAtomsAndBonds(mol)#find ring atoms 1486 OEPerceiveBondOrders(mol)#perceive bond orders 1487 OECanonicalOrderAtoms(mol)#canonical atom order 1488 OECanonicalOrderBonds(mol)#canonical bond order 1489 if moltypeflag: 1490 macobj.type='P'# polymer flag 1491 else: 1492 if iswater(molname): 1493 macobj.type='W'#water flag 1494 elif molatnum==1: 1495 macobj.type='A'#atom flag 1496 else: 1497 macobj.type='M'#non -polymer flag 1498 if rmHetmol: 1499 if macobj.getype()!='P': continue#if not polymer 1500 1501 if macobj.getype()!='P':#if molecule non-polymer or water 1502 if macobj.getype()=='A':#if molecule is atom 1503 if not OEHasImplicitHydrogens(mol): 1504 OEAssignImplicitHydrogens(mol)#add hydrogens 1505 OEAssignFormalCharges(mol)#add charges 1506 1507 if addH: 1508 if not OEHasImplicitHydrogens(mol): 1509 OEAssignImplicitHydrogens(mol)#add hydrogens 1510 OEAssignFormalCharges(mol)#add charges 1511 OEAddExplicitHydrogens(mol)#change implicit H into explicit 1512 OESet3DHydrogenGeom(mol)#add 3D coordinates 1513 1514 1515 isosmi=CanSmi(mol=mol,iso=True,kek=False,verbose=True)#create isomeric smile 1516 OEParseSmiles(mol1,isosmi)#check SMILE validity 1517 isosmi=OECreateSmiString(mol1,OESMILESFlag_ISOMERIC) 1518 mol1.Clear() 1519 smiobj=Smile(smile=isosmi)#smile object 1520 mol=smiobj.CanMol(mol=mol,kek=False,aromodel=OEAroModelOpenEye,verbose=0)#canonical molecule 1521 for bond in mol.GetBonds():#set bond type 1522 bond.SetIntType(bond.GetOrder()) 1523 macobj.isosmi=isosmi##add to molecule object isomeric SMILE code 1524 macobj.symq=smiobj.getAtoms(mol)#add atomic number and charge dictionary 1525 macobj.bonds=smiobj.getBonds(mol)#add bonds dictionary 1526 macobj.coords=smiobj.getCoords1(mol)# add coordinates dictionary 1527 OEAssignImplicitHydrogens(mol)#to calcule molecular weight and molecular formula 1528 OEAssignFormalCharges(mol)#add charges 1529 macobj.mw=OECalculateMolecularWeight(mol)# add molecular weight 1530 macobj.formul=OEMolecularFormula(mol)#add molecule formula 1531 macobj.mq=OENetCharge(mol)#sum of the formal charges on each atom of the molecule 1532 OESuppressHydrogens(mol) 1533 if macobj.mq!=0 and macobj.getype()=='A': 1534 macobj.type='I' 1535 1536 if not moldescdict.has_key(molname):#molecule not in dictionary 1537 desclist=[] 1538 desclist.append(macobj.getype())#get type 1539 if macobj.getype()=='P':#if polymer 1540 desclist.append('1')#molecule pdb id 1541 desclist.append('NULL')#molecule weight 1542 desclist.append('NULL')#molecule net charge 1543 desclist.append('NULL')#molecule formula 1544 desclist.append('NULL')#molecule bonds list 1545 desclist.append('NULL')#molecule symbol and charge 1546 else: 1547 desclist.append(molname)#if not polymer 1548 desclist.append(macobj.getmw())#molecule weight 1549 desclist.append(macobj.getmq())#molecule net charge 1550 desclist.append(macobj.getformul())#molecule formula 1551 desclist.append(macobj.getbonds())#molecule bonds 1552 desclist.append(macobj.getsymq())#molecule symbol and charge list 1553 desclist.append(macobj.getisosmi())#isomeric smile 1554 desclist.append([macobj.getchainid()]) 1555 moldescdict[molname]=desclist 1556 else: 1557 if macobj.getype()!='P': 1558 if moldescdict[molname][7]!=macobj.getisosmi(): 1559 molname=macobj.getmolname() 1560 modelid=macobj.getmodelid() 1561 chainid=macobj.getchainid() 1562 resid=macobj.getmolid() 1563 resinscode=macobj.getmolinscode() 1564 if chainid.isspace(): 1565 if resinscode.isspace(): 1566 err='model: %s, residue: %s' %(modelid,resid) 1567 else: 1568 err='model: %s, residue: %s, insertion code: %s' %(modelid,resid,resinscode) 1569 else: 1570 if resinscode.isspace(): 1571 err='model: %s, chain: %s, residue: %s' %(modelid,chainid,resid) 1572 else: 1573 err='model: %s, chain: %s, residue: %s, insertion code: %s'%(modelid,chainid,resid,resinscode) 1574 corrlist.append(False) 1575 print 'Warning: Different molecule description: %s (%s).' %(molname,err) 1576 self.log.warning('Dirfferent molecule description: %s (%s)',molname,err) 1577 else: 1578 corrlist.append(True) 1579 1580 molchainlist=moldescdict[molname][8] 1581 if chainid not in molchainlist: 1582 molchainlist.append(chainid) 1583 else: 1584 print 'Error: No atoms available!' 1585 sys.exit(1) 1586 1587 molnamedict[mackey]=macobj 1588 if not all(corrlist): 1589 corrflag=False#correction flag 1590 return corrflag,altloclist,modelist,chainlist,moldescdict,molnamedict
1591
1592 -def all(iterable):
1593 """ 1594 Return True if any element of the iterable is true. Build-in Python 2.5 1595 INPUT: 1596 iterable 1597 OUTPUT: 1598 boolean 1599 """ 1600 for element in iterable: 1601 if not element: 1602 return False 1603 return True
1604 ############### End of class ########################################################### 1605 ######################################################################################## 1606 ############## MAIN #################################################################### 1607 ############## Example of usage ######################################################## 1608 if __name__ == "__main__": 1609 pass 1610 # profile='/tmp/pdbfile/3ERDwithouthetat.pdb' 1611 # profile='/tmp/pdbfile/3ERD.pdb' 1612 # profile='/tmp/pdbfile/3ERDtest.pdb' 1613 # profile='/tmp/pdbfile/3ERDtest2.pdb' 1614 # profile='/tmp/pdbfile/3ERDtest3.pdb' 1615 # profile='/tmp/pdbfile/3ERDtest4.pdb' 1616 # profile='/tmp/pdbfile/3ERDtest5.pdb' 1617 # profile='/tmp/pdbfile/3ERDtest6.pdb' 1618 # profile='/tmp/pdbfile/1Z17.pdb' 1619 # profile='/tmp/pdbfile/1DP3.pdb' 1620 # profile='/tmp/pdbfile/1DP3test.pdb' 1621 # profile='/tmp/pdbfile/THOM.pdb' 1622 # profile='/tmp/pdbfile/2JG1.pdb' 1623 # profile='/tmp/pdbfile/3MBP.pdb' 1624 # profile='/tmp/pdbfile/3MBPtest.pdb' 1625 # profile='/tmp/pdbfile/1JFN.pdb' 1626 # A=PDB2DB(pdbfilepath=profile,repflag=False,path='/tmp/Log',filename='MacMolImport', 1627 # host='',db='',user='',passwd='',log=False,ligandb='') 1628 # A.PDB2Tab(tabcolvaldict={'PDBCode':'1DP3test1'},logdebug=True,addH=True,rmHetmol=False) 1629 # A.PDB2Tab(logdebug=True,addH=False,rmHetmol=False) 1630 # A.PDB2Tab(tabcolvaldict={'PDBCode':'3MBPtest16'},logdebug=True,addH=False,rmHetmol=False) 1631 # A.PDB2Tab(tabcolvaldict={'PDBCode':'3ERD1 ','PDBParentId':'3ERD'},logdebug=True,addH=False,rmHetmol=False) 1632 # A.PDB2Tab(tabcolvaldict={'PDBParentId':'3ERD'},logdebug=True,addH=False,rmHetmol=False) 1633 # A.addConf(tabcolvaldict={'PDBCode':'1DP3','ModelId':1},logdebug=True,addH=True,rmHetmol=False) 1634 # 1635 # ## TEST addConf file 3ERDtest4 ### 1636 # A.addConf(tabcolvaldict={'ModelId':[4]},logdebug=True,addH=False,rmHetmol=False, rmMacmol=False) 1637 # A.addConf(logdebug=True,addH=False,rmHetmol=False, rmMacmol=False) 1638 # A.addConf(tabcolvaldict={'ModelId':[6]},logdebug=True,addH=False,rmHetmol=False, rmMacmol=False) 1639 # A.addConf(tabcolvaldict={'ModelId':[11]},logdebug=True,addH=False,rmHetmol=False, rmMacmol=True) 1640 # ## TEST addConf file 3ERDtest5 ### 1641 # A.addConf(logdebug=True,addH=True,rmHetmol=False, rmMacmol=False) 1642 # A.addConf(tabcolvaldict={'ModelId':[5]},logdebug=True,addH=False,rmHetmol=False, rmMacmol=False) 1643 # ## TEST addConf file 3ERDtest6 ### 1644 # A.addConf(tabcolvaldict={'ModelId':[3,4]},logdebug=True,addH=False,rmHetmol=False, rmMacmol=False) 1645