Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package Data2DB :: Module PDBFile
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.Data2DB.PDBFile

   1  #!/usr/bin/env python 
   2  ################################# 
   3  # PDBFile.py                    # 
   4  # PDB file classes and methods  # 
   5  ################################# 
   6   
   7  ###################################################### 
   8  # Copyright (c) 2007-2008 Andrzej Bak                # 
   9  # ARC Seibersdorf & University of Silesia            # 
  10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
  11  # License: GNU General Public License, version: 3    # 
  12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
  13  # Version: 1, 06.01.2010                             # 
  14  ###################################################### 
  15   
  16  try: 
  17      import sys 
  18      import os 
  19      import re 
  20      from openeye.oechem import * 
  21      from MoSTBioDat.DataBase.ImportData.Data2DB.Smile import Smile 
  22  except ImportError,e: 
  23      print 'Error: %s' %e 
  24      sys.exit(1) 
  25       
  26  ###################################################### 
27 -def createPDBMol(ofs,mol):
28 """ 29 create PDB file from molecule 30 INPUT: 31 ofs - output file stream 32 mol - molecule object 33 OUTPUT: 34 file 35 """ 36 if OEHasResidues(mol): 37 OEPDBOrderAtoms(mol) 38 else: 39 OEPerceiveResidues(mol) 40 if mol.GetDimension()<3: 41 flags=OEPDBOFlag_ORDERS|OEPDBOFlag_BONDS 42 else: 43 flags=OEPDBOFlag_DEFAULT 44 OEWritePDBFile(ofs,mol,flags)
45 46 ###################################################
47 -def preprot(mol):
48 """ 49 prepare molecule 50 INPUT: 51 mol - class object 52 OUTPUT: 53 """ 54 if OEHasResidues(mol): 55 OEPDBOrderAtoms(mol) 56 else: 57 OEPerceiveResidues(mol) 58 OEDetermineConnectivity(mol) 59 OEFindRingAtomsAndBonds(mol) 60 OEPerceiveBondOrders(mol) 61 OEAssignFormalCharges(mol) 62 OEAssignAromaticFlags(mol)
63 64 ############# Residue class ######################
65 -class Residue(object):
66 """ 67 residue class 68 INPUT: 69 name - str, residue name 70 num - int, residue number 71 OUTPUT: 72 class object 73 """ 74 Name="" 75 ResidueNumber=0 76 Atoms=[] 77 OEReslist=[] 78 Mol=None 79 NumAtoms=0 80 ChainID=0 81 InsertCode=0 82 FragmentNumber=0 83 ModelNumber=0 84 SecondaryStructure=0 85 AlternateLocation=" " 86
87 - def __init__(self,name,num):
88 self.Name=name 89 self.ResidueNumber=num 90 self.Atoms=[] 91 self.OEReslist=[]
92
93 - def addat(self,atom):
94 """ 95 add atom to residue 96 INPUT: 97 atom 98 OUTPUT: 99 class object 100 """ 101 self.Atoms.append(atom) 102 self.NumAtoms=len(self.Atoms) 103 res=OEAtomGetResidue(atom) 104 self.OEReslist.append(res) 105 self.ChainID=res.GetChainID() 106 self.InsertCode=res.GetInsertCode() 107 self.FragmentNumber=res.GetFragmentNumber() 108 self.ModelNumber=res.GetModelNumber() 109 self.SecondaryStructure=res.GetSecondaryStructure() 110 self.AlternateLocation=res.GetAlternateLocation() 111 if res.IsHetAtom(): 112 self.HetAt=True 113 else: 114 self.HetAt=False
115 ############# end of class ######################## 116
117 -def iswater(formula):
118 """ 119 check if is water molecule 120 INPUT: 121 formula - str, chemical formula 122 OUTPUT: 123 boolean 124 """ 125 formula=formula.lower() 126 if formula in ['hoh','sol','h2o']: 127 return True 128 else: 129 return False
130 #########################################################
131 -def checkres(r):
132 """ 133 check residue 134 INPUT: 135 residue - residue class object 136 INPUT: 137 string 138 """ 139 global errorcount 140 rn=r.ResidueNumber 141 vals=map(lambda a:OEAtomGetResidue(a).GetChainID(),r.Atoms) 142 for val in vals: 143 if vals[0]!=val: 144 print 'ERROR: chainid in %s%d: %s!=%s'%(r.Name,rn,vals[0],val) 145 errorcount+=1 146 vals=map(lambda a:OEAtomGetResidue(a).GetInsertCode(),r.Atoms) 147 for val in vals: 148 if vals[0]!=val: 149 print 'ERROR: inscode in %s%d: %s!=%s'%(r.Name,rn,vals[0],val) 150 errorcount+=1 151 vals=map(lambda a:OEAtomGetResidue(a).GetFragmentNumber(),r.Atoms) 152 for val in vals: 153 if vals[0]!=val: 154 print 'ERROR: fragnum in %s%d: %s!=%s'%(r.Name,rn,vals[0],val) 155 errorcount+=1 156 vals=map(lambda a:OEAtomGetResidue(a).GetModelNumber(),r.Atoms) 157 for val in vals: 158 if vals[0]!=val: 159 print 'ERROR: modnum in %s%d: %s!=%s'%(r.Name,rn,vals[0],val) 160 errorcount+=1 161 vals=map(lambda a:OEAtomGetResidue(a).GetSecondaryStructure(),r.Atoms) 162 for val in vals: 163 if vals[0]!=val: 164 print 'ERROR: secstruc in %s%d: %s!=%s'%(r.Name,rn,vals[0],val) 165 errorcount+=1 166 vals=map(lambda a:OEAtomGetResidue(a).GetAlternateLocation(),r.Atoms) 167 for val in vals: 168 if vals[0]!=val: 169 print 'ERROR: secstruc in %s%d: %s!=%s'%(r.Name,rn,vals[0],val) 170 errorcount+=1
171 172 #############################################################################
173 -def printres(mol,cids,residues,restypes,natom,resnums):
174 """ 175 print pdb file summary 176 INPUT: 177 mol - molecule object 178 cids - chain id 179 residues - residues object 180 restypes - residue types 181 natom - number of atom 182 resnums - residue atom number 183 OUTPUT: 184 string 185 """ 186 outstr="" 187 outstr+=('\tchains: %d (%s)\n'%(len(cids),','.join(cids))) 188 outstr+=('\tresidues: %d\n' % len(residues)) 189 if restypes.has_key("HOH"): 190 outstr+=('\tresidues not "HOH": %d\n'%(len(residues)-restypes["HOH"])) 191 if restypes.has_key("LIG"): 192 outstr+=('\t"LIG" residues: %d\n'%(restypes["LIG"])) 193 if restypes.has_key("UNK"): 194 outstr+=('\t"UNK" residues: %d\n'%(restypes["UNK"])) 195 outstr+=('\tatoms in residues: %d\n' % (natom)) 196 outstr+=('\tatoms not in residues: %d\n' % (mol.NumAtoms()-natom)) 197 outstr+=('\tresidues number range: %d - %d\n' % (min(resnums),max(resnums))) 198 199 resnames=restypes.keys() 200 resnames.sort() 201 outstr+='\tComposition: ' 202 for resname in resnames: 203 outstr+=('%s:%d '%(resname,restypes[resname])) 204 outstr+='\n' 205 206 for r in residues: 207 outstr+=('\t%s: '%r.Name 208 +'res_id=%d, '%r.ResidueNumber 209 +'chain_id=%s ,'%r.ChainID 210 +'ins_code=%s ,'%r.InsertCode 211 +'frag_num=%d ,'%r.FragmentNumber 212 +'model_id=%d ,'%r.ModelNumber 213 +'sec_str=%d ,'%r.SecondaryStructure 214 +'at_num=%d ,'%r.NumAtoms 215 +'at_altloc=%s\n' %r.AlternateLocation 216 ) 217 return outstr
218 219 ###################################################
220 -def defres(mol):
221 """ 222 define residue 223 INPUT: 224 mol - molecule object 225 OUTPUT: 226 cids - chain id 227 residues - residue object list 228 restypes - residue types 229 natom - number of atoms in residue 230 resnums - residue names 231 """ 232 residues=[] ### list of residue objects 233 restypes={} ### names/frequencies 234 natom=0 ## number of atoms 235 cids=[]# chain id 236 resnums=[]#residue names 237 prevres=None#previous residue 238 239 for atom in mol.GetAtoms():#iterate atoms 240 if OEHasResidue(atom): 241 natom+=1 242 else: 243 continue 244 res=OEAtomGetResidue(atom)#get residues 245 if not prevres or not OESameChain(res,prevres):#if not same chain 246 cid=res.GetChainID()#get chain id 247 if re.match('\S',cid) and cid not in cids: 248 cids.append(cid) 249 try: 250 resnum=int(res.GetResidueNumber())#get residue id 251 except ValueError, e: 252 print 'Error: %s' %e 253 continue 254 if resnum not in resnums: 255 resnums.append(resnum) 256 if not prevres or not OESameResidue(res,prevres): 257 residue=Residue(res.GetName(),resnum) 258 residues.append(residue) 259 if not restypes.has_key(res.GetName()): 260 restypes[res.GetName()]=0 261 restypes[res.GetName()]+=1 262 residues[-1].addat(atom) 263 prevres=res 264 265 return cids,residues,restypes,natom,resnums
266 267 ######################################################################
268 -def sortdictval(dictionary):
269 """ 270 sort dictionary value 271 INPUT: 272 dictionary 273 OUTPUT: 274 sorted list of tuples 275 """ 276 return sorted(dictionary.iteritems(), key=lambda (k,v):(v,k))
277 278 ######################################################################
279 -def parseres(mol,residue):
280 """ 281 parse residue 282 INPUT: 283 mol - oe class object 284 residue - residue class object 285 repflag - boolean, report flag, default False 286 """ 287 checkres(residue)#check if residue correct 288 modelid=residue.ModelNumber#get model id 289 chainid=residue.ChainID#get chain id 290 resid=residue.ResidueNumber#get residue id 291 resname=residue.Name#residue name 292 resatnum=residue.NumAtoms#residue atom number 293 resinscode=residue.InsertCode#residue insert code 294 resfragnum=residue.FragmentNumber#residue fragment number 295 resecstruct=residue.SecondaryStructure#residue secondary structure 296 hetflag=residue.HetAt#residue flag 297 objdict={}#object dictionary 298 for atom in residue.Atoms:#iterate atoms 299 atdict={} 300 at=OEAtomGetResidue(atom) 301 atnum=atom.GetAtomicNum()#get atomic number 302 if atnum: 303 atdict['num']=atnum 304 atsymbol=OEGetAtomicSymbol(atnum)#get atom symbol 305 if atsymbol: 306 atdict['symbol']=atsymbol 307 ataltloc=at.GetAlternateLocation() 308 atname=atom.GetName()#get atom name 309 if atname: 310 atdict['name']=atname 311 else: 312 print 'Error: Can not retrieve atom name from %s' %resname 313 sys.exit(1) 314 if ataltloc!=" ": 315 atname='%s_%s' %(atname,ataltloc) 316 atdict['name']=atname 317 atsernum=at.GetSerialNumber()#get atom serial number 318 if atsernum: 319 atdict['sernum']=atsernum 320 atbfactor=at.GetBFactor()#get atom b factor 321 atdict['bfactor']=atbfactor 322 atoccup=at.GetOccupancy()#get atom occupancy 323 atdict['occup']=atoccup 324 atishet=at.IsHetAtom() 325 atdict['ishet']=atishet 326 atcoord=mol.GetCoords(atom)#get atom coordinates 327 atdict['altloc']=ataltloc 328 if atcoord: 329 atdict['coords']=atcoord 330 if objdict.has_key(atname):#if atom id exists in dictionary 331 print 'Error: Invalid key: %s, serial number: %s! Please, change atom name in PDB file ...' %(atname.strip(),atsernum) 332 sys.exit(1) 333 objdict[atname]=Atom(**atdict)#add atom class object to dictionary 334 obj=Molecule(modelid,chainid,resid,resname,resatnum,resinscode,resfragnum,resecstruct,objdict,hetflag)#class object 335 if chainid.isspace(): 336 if resinscode.isspace(): 337 resid='%s_%s' %(modelid,resid) 338 else: 339 resid='%s_%s%s' %(modelid,resid,resinscode) 340 else: 341 if resinscode.isspace(): 342 resid='%s_%s_%s' %(modelid,chainid,resid) 343 else: 344 resid='%s_%s_%s%s'%(modelid,chainid,resid,resinscode) 345 346 return resid,obj
347 348 ####### Molecule class ###########################
349 -class Molecule(object):
350 """ 351 Molecule class 352 INPUT: 353 modelid - int, model id 354 chainid - int, chain id 355 molid - int, molecule id 356 molname - str, molecule name 357 molatnum - int, molecule atom number 358 molinscode - str, molecule insertion code 359 molfragnum - int, molecule fragment number 360 molcoords - dict, molecule coordinates dictionary 361 OUTPUT: 362 class object 363 """
364 - def __init__(self,modelid,chainid,molid, molname,molatnum,molinscode,molfragnum,molsecstruct,molatoms,molhetflag):
365 self.modelid=modelid 366 self.chainid=chainid 367 self.molid=molid 368 self.molname=molname 369 self.molatnum=molatnum 370 self.molatoms=molatoms 371 self.molfragnum=molfragnum 372 self.molinscode=molinscode 373 self.molsecstruct=molsecstruct 374 self.molhetflag=molhetflag 375 self.isosmi=None 376 self.type=None 377 self.symq=None 378 self.bonds=None 379 self.coords=None 380 self.wm=None 381 self.mq=None 382 self.formul=None
383
384 - def getmodelid(self):
385 """ 386 get model id 387 INPUT: 388 class object 389 OUTPUT: 390 model id 391 """ 392 return self.modelid
393
394 - def setmodelid(self,id):
395 """ 396 set model id 397 INPUT: 398 class object 399 id - int, model id 400 OUTPUT: 401 model id 402 """ 403 self.modelid=id
404
405 - def getchainid(self):
406 """ 407 get chain id 408 INPUT: 409 class object 410 OUTPUT: 411 chain id 412 """ 413 return self.chainid
414
415 - def getmolname(self):
416 """ 417 get molecule name 418 INPUT: 419 class object 420 OUTPUT: 421 molecule name 422 """ 423 return self.molname
424
425 - def getmolid(self):
426 """ 427 get molecule id 428 INPUT: 429 class object 430 OUTPUT: 431 molecule id 432 """ 433 return self.molid
434
435 - def getmolatnum(self):
436 """ 437 get molecule atom number 438 INPUT: 439 class object 440 OUTPUT: 441 molecule atom number 442 """ 443 return self.molatnum
444
445 - def getmolatoms(self):
446 """ 447 get molecule atoms 448 INPUT: 449 class object 450 OUTPUT: 451 dictionary 'atom id': atom class object 452 """ 453 return self.molatoms
454
455 - def getmolinscode(self):
456 """ 457 get molecule insertion code 458 INPUT: 459 class object 460 OUTPUT: 461 molecule insertion code, str, default space 462 """ 463 return self.molinscode
464
465 - def getmolfragnum(self):
466 """ 467 get molecule fragment number 468 INPUT: 469 class object 470 OUTPUT: 471 molecule fragment number, int, default 0 472 """ 473 return self.molfragnum
474
475 - def getmolsecstruct(self):
476 """ 477 get molecule secondary structure 478 INPUT: 479 class object 480 OUTPUT: 481 molecule secondary structure, int, default 0 482 """ 483 return self.molsecstruct
484
485 - def getisosmi(self):
486 """ 487 get isomeric SMILE code 488 INPUT: 489 class object 490 OUTPUT: 491 isomeric SMILE code 492 """ 493 return self.isosmi
494
495 - def getype(self):
496 """ 497 get molecule type 498 INPUT: 499 class object 500 OUTPUT: 501 char - P - polymer, S - polymer segment, M - non polymer, W - water, I - ion, A - atom 502 """ 503 return self.type
504
505 - def getsymq(self):
506 """ 507 get symbol and charge dictionary with list in canonical order 508 INPUT: 509 class object 510 OUTPUT: 511 symbol and charge dictionary 512 """ 513 return self.symq
514
515 - def getbonds(self):
516 """ 517 get bonds dictionary with list in canonical order 518 INPUT: 519 class object 520 OUTPUT: 521 bonds dictionary 522 """ 523 return self.bonds
524
525 - def getcoords(self):
526 """ 527 get coordinates dictionary with list in canonical order 528 INPUT: 529 class object 530 OUTPUT: 531 coordinates dictionary 532 """ 533 return self.coords
534
535 - def getmw(self):
536 """ 537 get molecule weight 538 INPUT: 539 class object 540 OUTPUT: 541 molecular weight 542 """ 543 return self.mw
544
545 - def getmq(self):
546 """ 547 get molecule charge 548 INPUT: 549 class object 550 OUTPUT: 551 molecular net charge 552 """ 553 return self.mq
554
555 - def getformul(self):
556 """ 557 get molecule formula 558 INPUT: 559 class object 560 OUTPUT: 561 molecule formula 562 """ 563 return self.formul
564
565 - def gethetflag(self):
566 """ 567 get molecule heteroflag 568 INPUT: 569 class object 570 OUTPUT: 571 boolean 572 """ 573 return self.molhetflag
574 575 ############### end of class ######################### 576 ################# class Atom #########################
577 -class Atom(object):
578 """ 579 atom class 580 INPUT: 581 name - str, atom name 582 OUTPUT: 583 """
584 - def __init__(self,**kwarg):
585 self.kwarg=kwarg
586
587 - def getatname(self):
588 """ 589 get atom name 590 INPUT: 591 class object 592 OUTPUT: 593 atom name, str 594 """ 595 return self.kwarg.get('name',None)
596
597 - def getatsernum(self):
598 """ 599 get atom serial number 600 INPUT: 601 class object 602 OUTPUT: 603 atom serial number, int 604 """ 605 return self.kwarg.get('sernum',None)
606
607 - def getatsymbol(self):
608 """ 609 get atom symbol 610 INPUT: 611 class object 612 OUTPUT: 613 atom symbol, str 614 """ 615 return self.kwarg.get('symbol',None)
616
617 - def getatbfactor(self):
618 """ 619 get atom b factor 620 INPUT: 621 class object 622 OUTPUT: 623 atom b factor, float 624 """ 625 return self.kwarg.get('bfactor',None)
626
627 - def getatnum(self):
628 """ 629 get atom number 630 INPUT: 631 class object 632 OUTPUT: 633 atom number, int 634 """ 635 return self.kwarg.get('num',None)
636
637 - def getatcoords(self):
638 """ 639 get atom coordinates 640 INPUT: 641 class object 642 OUTPUT: 643 atom coordinates, tuple 644 """ 645 return self.kwarg.get('coords',None)
646
647 - def getatoccup(self):
648 """ 649 get atom occupancy 650 INPUT: 651 class object 652 OUTPUT: 653 atom occupancy, float 654 """ 655 return self.kwarg.get('occup',None)
656
657 - def ishetat(self):
658 """ 659 get heteroatom flag 660 INPUT: 661 class object 662 OUTPUT: 663 heteroatom flag, boolean 664 """ 665 return self.kwarg.get('ishet',None)
666
667 - def getataltloc(self):
668 """ 669 get alternate location 670 INPUT: 671 class object 672 OUTPUT: 673 alternate location, str 674 """ 675 return self.kwarg.get('altloc',None)
676 677 ############## end of class ########################## 678 ############# PDB file class #########################
679 -def PDBFile(pdbfilepath,repflag=True):
680 """ 681 parse PDB file 682 INPUT: 683 pdbfilepath - str, file path 684 repflag - boolean, residue flag, default True 685 """ 686 ifs=oemolistream() 687 if not ifs.open(pdbfilepath): 688 OEThrow.Fatal('Cannot open %s' %pdbfilepath) 689 flavor=OEIFlavor_PDB_DATA 690 ifs.SetFlavor(OEFormat_PDB,flavor) 691 nmol=0 692 global errorcount 693 errorcount=0 694 ligdict={}#ligand dictionary 695 macdict={}#macromolecule dictionary 696 mol=OEGraphMol() 697 OEReadPDBFile(ifs,mol,flavor) 698 tagspair=OEGetPDBDataPairs(mol) 699 tagdict={} 700 for tagpair in tagspair: 701 taglist=[] 702 tag=tagpair.GetTag() 703 tag=tag.strip() 704 tagval=tagpair.GetValue() 705 tagval=tagval.strip() 706 if tagdict.has_key(tag): 707 taglist=tagdict[tag] 708 taglist.append(tagval) 709 else: 710 taglist.append(tagval) 711 tagdict[tag]=taglist 712 mol.Clear() 713 ifs.close() 714 ifs.open(pdbfilepath) 715 flavor=OEIFlavor_Generic_Default|OEIFlavor_PDB_Default|OEIFlavor_PDB_END|OEIFlavor_PDB_DATA 716 ifs.SetFlavor(OEFormat_PDB,flavor) 717 for mol in ifs.GetOEGraphMols(): 718 nmol+=1 719 preprot(mol)#prepare PDB molecule 720 cids,residues,restypes,natom,resnums = defres(mol)#define residues 721 if repflag: 722 print printres(mol,cids,residues,restypes,natom,resnums) 723 for residue in residues:#iterate residues 724 resname=residue.Name#get residue name 725 resatnum=residue.NumAtoms#get residue atom number 726 if residue.HetAt and (not iswater(resname) and resatnum>1):#get heteroatom flag 727 resligid,ligobj=parseres(mol,residue)#parse residues 728 if ligdict.has_key(resligid): 729 print 'Error: Ligand: %s exist in dictionary!' %resligid 730 continue 731 ligdict[resligid]=ligobj 732 else:#macromolecule 733 resmacid,macobj=parseres(mol,residue)#parse residues 734 if macdict.has_key(resmacid): 735 print 'Error: Macromolecule: %s exist in dictionary!' %resmacid 736 continue 737 macdict[resmacid]=macobj 738 ifs.close() 739 mol.Clear() 740 return tagdict,ligdict,macdict
741 ################# End of class #################################### 742 ################# SDF file class ####################################
743 -class createSDFile(object):
744 """ 745 create SDF file in specified directory 746 INPUT: 747 inboj - input ligand class object 748 dirpath - str, directory path 749 temporary - str, temporary directory path, default /tmp/Ligand 750 sdf2pdb - boolean, convert sdf2pdb, default False 751 aromaticflag - str, oe aromatic flag, default OEAroModelOpenEye 752 addH - boolean, add hydrogen atoms, default False 753 OUTPUT: 754 file 755 """
756 - def __init__(self,inobj,dirpath,**kwargs):
757 self.inobj=inobj 758 sdfileid=self.inobj.getmolid() 759 sdfilename=self.inobj.getmolname() 760 sdfmolid=self.inobj.getmodelid() 761 sdfinscode=self.inobj.getmolinscode() 762 if sdfinscode.isspace(): 763 sdfile='%s_%s_%s' %(sdfilename,sdfileid,sdfmolid) 764 else: 765 sdfile='%s_%s%s_%s' %(sdfilename,sdfileid,sdfinscode,sdfmolid) 766 pdbfile=sdfile 767 sdfile='%s.sdf' %sdfile 768 self.kwargs={} 769 self.kwargs=kwargs 770 try: 771 self.kwargs['sdfile']=str(sdfile) 772 except ValueError,e: 773 print 'Error: %s' %e 774 sys.exit(1) 775 self.kwargs['dirpath']=dirpath 776 if not self.kwargs.has_key('dirpath'): 777 self.kwargs.setdefault('dirpath','/tmp') 778 if not self.kwargs.has_key('temporary'): 779 self.kwargs.setdefault('temporary','/tmp/Ligand') 780 if not self.kwargs.has_key('sdf2pdb'): 781 self.kwargs.setdefault('sdf2pdb',False) 782 if not self.kwargs.has_key('aromaticflag'): 783 self.kwargs.setdefault('aromaticflag','OEAroModelOpenEye') 784 if not self.kwargs.has_key('addH'): 785 self.kwargs.setdefault('addH',False) 786 self.createsdfilepath() 787 self.createsdfile(format='OEFormat_SDF') 788 if self.kwargs['sdf2pdb']: 789 self.kwargs['sdfile']='%s.pdb' %(pdbfile) 790 self.createsdfilepath() 791 self.createsdfile(format='OEFormat_PDB')
792
793 - def createsdfilepath(self):
794 """ 795 create SDF file 796 INPUT: 797 class object 798 OUTPUT: 799 sdf filename absolute path 800 """ 801 if self.kwargs['dirpath']=='': 802 self.kwargs['dirpath']=os.path.abspath(os.curdir) 803 if not os.path.isdir(self.kwargs['dirpath']): 804 try: 805 os.makedirs(self.kwargs['dirpath']) 806 except Exception,error: 807 print 'Error: %s, %s for %s' %(error[0],error[1],self.kwargs['dirpath']) 808 print 'SDF files in %s' %self.kwargs['temporary']#create directory in temporary path 809 self.kwargs['dirpath']=self.kwargs['temporary'] 810 if not os.path.exists(self.kwargs['dirpath']): 811 os.makedirs(self.kwargs['dirpath']) 812 else: 813 if not os.access(self.kwargs['dirpath'],os.W_OK): 814 print 'Error: Permission denied! Unable to write in %s' %self.kwargs['dirpath'] 815 self.kwargs['dirpath']=self.kwargs['temporary'] 816 if not os.path.exists(self.kwargs['dirpath']): 817 os.makedirs(self.kwargs['dirpath']) 818 elif not os.access(self.kwargs['dirpath'],os.W_OK): 819 print 'Error: Permission denied! Unable to write in %s' %self.kwargs['dirpath'] 820 sys.exit(1) 821 self.sdfullfilename=os.path.join(self.kwargs['dirpath'],self.kwargs['sdfile']) 822 if os.path.isfile(self.sdfullfilename): 823 print 'Error: Specified file: %s exists in %s' %(self.kwargs['sdfile'],self.kwargs['dirpath']) 824 sys.exit(1) 825 print 'SDF file path: %s' %self.sdfullfilename
826
827 - def createsdfile(self,format='OEFormat_SDF'):
828 """ 829 create SDF file 830 INPUT: 831 class object 832 format - str, oe file format 833 OUTPUT: 834 SDF file format 835 """ 836 atoms=self.inobj.getmolatoms()#get atom object dictionary 837 if atoms:# if atoms exist 838 mol=OECreateOEMol()#create oe molecule object 839 mol1=OEGraphMol()#create oe molecule object 840 atdict=dict([(atkey, atobj.getatsernum()) for atkey, atobj in atoms.iteritems()])#atom dictionary - atom resideu id : atom serial number 841 atdict=sortdictval(atdict)#sorted atom list of tuples 842 for atitem in atdict:#iterate list of tuples 843 atkey=atitem[0]#atom key 844 atobj=atoms[atkey]#atom object 845 atname=atobj.getatname()#get atom id name 846 atsymbol=OEGetAtomicSymbol(atobj.getatnum())#get atom symbol 847 atcoords=atobj.getatcoords()#get atom coordinates 848 if not (atcoords or atsymbol):#not atom coordinates or atom symbol 849 print 'Error: No atomic coordinates or atomic symbol available!' 850 sys.exit(1) 851 atype='OEElemNo_%s'%atsymbol#atom symbol 852 newat=mol.NewAtom(eval(atype))#create new oe atom 853 854 OEDetermineConnectivity(mol)#determinate connectivity 855 OEFindRingAtomsAndBonds(mol)#find ring atoms 856 OEPerceiveBondOrders(mol)#deteminate bond orders 857 OECanonicalOrderAtoms(mol) 858 OECanonicalOrderBonds(mol) 859 860 if self.kwargs['addH']: 861 if not OEHasImplicitHydrogens(mol): 862 OEAssignImplicitHydrogens(mol) 863 OEAssignFormalCharges(mol) 864 OEAddExplicitHydrogens(mol) 865 OESet3DHydrogenGeom(mol) 866 867 isosmi=CanSmi(mol=mol,iso=True,kek=False,verbose=True)#create isomeric smile 868 OEParseSmiles(mol1,isosmi)#check SMILE validity 869 isosmi=OECreateSmiString(mol1,OESMILESFlag_ISOMERIC) 870 mol1.Clear() 871 smiobj=Smile(smile=isosmi) 872 mol=smiobj.CanMol(mol=mol,kek=False,aromodel=OEAroModelOpenEye,verbose=0) 873 for bond in mol.GetBonds(): 874 bond.SetIntType(bond.GetOrder()) 875 876 ofs=oemolostream() 877 if self.sdfullfilename and not ofs.open(self.sdfullfilename): 878 OEThrow.Fatal('Cannot open: %s' %self.sdfullfilename) 879 ofs.SetFormat(eval(format)) 880 OEWriteMolecule(ofs,mol) 881 else: 882 print 'Error: No atoms available!' 883 sys.exit(1)
884
885 -def CanSmi(mol,iso,kek,verbose):
886 """ 887 Create canonical smile (unique or absolute) 888 INPUT: 889 mol - molecule class object 890 iso - boolean, create isomeric SMILE 891 kek - boolean, Kekule aromatic form 892 varbose - boolean, show warnings 893 OUTPUT: 894 """ 895 smiflag=OESMILESFlag_Canonical 896 if iso: 897 smiflag|=OESMILESFlag_ISOMERIC 898 OEPerceiveChiral(mol) 899 for atom in mol.GetAtoms(): 900 IsChiral=atom.IsChiral() 901 HasStereo=atom.HasStereoSpecified() 902 903 if HasStereo and not IsChiral: 904 if verbose: 905 OEThrow.Warning("correcting atom stereo inconsistency!") 906 nbrs=[] 907 for bond in atom.GetBonds(): 908 nbrs.append(bond.GetNbr(atom)) 909 atom.SetStereo(nbrs,OEAtomStereo_Tetrahedral,OEBondStereo_Undefined) 910 911 for bond in mol.GetBonds(): 912 if bond.GetOrder()!=2: continue 913 HasStereoSpecified=bond.HasStereoSpecified() 914 IsChiral=bond.IsChiral() 915 if HasStereoSpecified and not IsChiral: 916 if verbose: 917 OEThrow.Warning("correcting bond stereo inconsistency!") 918 a1=bond.GetBgn() 919 a2=bond.GetEnd() 920 bond.SetStereo([a1,a2],OEBondStereo_CisTrans,OEBondStereo_Undefined) 921 922 if kek: 923 OEFindRingAtomsAndBonds(mol) 924 OEAssignAromaticFlags(mol,OEAroModelOpenEye) 925 for bond in mol.GetBonds(OEIsAromaticBond()): 926 bond.SetIntType(5) 927 OECanonicalOrderAtoms(mol) 928 OECanonicalOrderBonds(mol) 929 OEClearAromaticFlags(mol) 930 OEKekulize(mol) 931 932 smi=OECreateSmiString(mol,smiflag) 933 return smi
934
935 -class createPDBFile(object):
936 """ 937 create PDB file 938 INPUT: 939 macromolecule - input macromolecule dictionary 940 dirpath - str, directory path 941 pdbfile - str, output pdb file 942 OUTPUT: 943 file 944 """
945 - def __init__(self,macromolecule,dirpath,pdbfile,**kwargs):
946 self.macromolecule=macromolecule 947 self.pdbfile=pdbfile 948 self.kwargs={} 949 self.kwargs=kwargs 950 try: 951 self.kwargs['pdbfile']=str(pdbfile) 952 except ValueError,e: 953 print 'Error: %s' %e 954 sys.exit(1) 955 self.kwargs['dirpath']=dirpath 956 if not self.kwargs.has_key('dirpath'): 957 self.kwargs.setdefault('dirpath','/tmp') 958 if not self.kwargs.has_key('temporary'): 959 self.kwargs.setdefault('temporary','/tmp/MacroMol') 960 if not self.kwargs.has_key('hetat'): 961 self.kwargs.setdefault('hetat',False) 962 if not self.kwargs.has_key('headerdict'): 963 self.kwargs.setdefault('headerdict',{}) 964 self.createpdbfilepath() 965 self.writepdbfile()
966
967 - def createpdbfilepath(self):
968 """ 969 create PDB filepath 970 INPUT: 971 class object 972 OUTPUT: 973 sdf filename absolute path 974 """ 975 if self.kwargs['dirpath']=='': 976 self.kwargs['dirpath']=os.path.abspath(os.curdir) 977 if not os.path.isdir(self.kwargs['dirpath']): 978 try: 979 os.makedirs(self.kwargs['dirpath']) 980 except Exception,error: 981 print 'Error: %s, %s for %s' %(error[0],error[1],self.kwargs['dirpath']) 982 print 'PDB files in %s' %self.kwargs['temporary']#create directory in temporary path 983 self.kwargs['dirpath']=self.kwargs['temporary'] 984 if not os.path.exists(self.kwargs['dirpath']): 985 os.makedirs(self.kwargs['dirpath']) 986 else: 987 if not os.access(self.kwargs['dirpath'],os.W_OK): 988 print 'Error: Permission denied! Unable to write in %s' %self.kwargs['dirpath'] 989 self.kwargs['dirpath']=self.kwargs['temporary'] 990 if not os.path.exists(self.kwargs['dirpath']): 991 os.makedirs(self.kwargs['dirpath']) 992 elif not os.access(self.kwargs['dirpath'],os.W_OK): 993 print 'Error: Permission denied! Unable to write in %s' %self.kwargs['dirpath'] 994 sys.exit(1) 995 self.pdbfullfilename=os.path.join(self.kwargs['dirpath'],self.kwargs['pdbfile']) 996 if os.path.isfile(self.pdbfullfilename): 997 print 'Error: Specified file: %s exists in %s' %(self.kwargs['pdbfile'],self.kwargs['dirpath']) 998 sys.exit(1) 999 print 'PDB file path: %s' %self.pdbfullfilename
1000
1001 - def createmacmol(self):
1002 """ 1003 create macromolecule pdb file 1004 INPUT: 1005 class object 1006 OUTPUT: 1007 mol object 1008 """ 1009 mol=OECreateOEMol()#create oe molecule object 1010 for tagkey, tagvalue in self.kwargs['headerdict'].iteritems(): 1011 if isinstance(tagvalue,str): 1012 tagvalue=[tagvalue] 1013 if not isinstance(tagvalue,list): 1014 print 'Error: Incorrect tag value specification! Must be string! Please correct it ...' 1015 sys.exit(1) 1016 1017 ### tagkey must have length of 6 ### 1018 tagkeylen=len(tagkey) 1019 if tagkeylen<6: 1020 tagkey=tagkey+(6-tagkeylen)*' ' 1021 for tagitem in tagvalue: 1022 if not isinstance(tagitem,str): 1023 print 'Error: Incorrect tag value specification! Must be string! Please correct it ...' 1024 sys.exit(1) 1025 contline=tagitem[0:2].strip() 1026 ### if tag value starts with continuation line ### 1027 if contline.isdigit(): 1028 lencontline=len(contline) 1029 if lencontline==1: 1030 tagitem=' '+tagitem 1031 else: 1032 tagitem=2*' '+tagitem 1033 OEAddPDBData(mol,tagkey,2*' '+tagitem) 1034 1035 ### sort according residue_id ### 1036 # moldict=dict([(molkey, molobj.getmolid()) for molkey, molobj in self.macromolecule.iteritems()])#molecule dictionary - chain_molecule id : molecule id 1037 # moldict=sortdictval(moldict)#sort dictionary according values 1038 ### sort according model, chain, residue_id, insertion_code ### 1039 moldict=dict([(molkey, (molobj.getmodelid(),molobj.getchainid(),molobj.getmolid(),molobj.getmolinscode())) for molkey, molobj in self.macromolecule.iteritems()])#molecule dictionary - chain_molecule id : molecule id 1040 moldict=sortdictval(moldict)#sort dictionary according values 1041 for molitem in moldict: 1042 mackey=molitem[0]#molecule key 1043 macobj=self.macromolecule[mackey]#molecule object 1044 atoms=macobj.getmolatoms()#get atom object dictionary 1045 modelid=macobj.getmodelid()#get model id 1046 chainid=macobj.getchainid()#get chain id 1047 molname=macobj.getmolname()#get residue name 1048 molid=macobj.getmolid()# get residue id 1049 molinscode=macobj.getmolinscode()#get residue insertion code 1050 molfragnum=macobj.getmolfragnum()# get residue fragment number 1051 molsecstruct=macobj.getmolsecstruct()#get residue secondary structure 1052 if atoms:# if atoms exist 1053 atdict=dict([(atkey, atobj.getatsernum()) for atkey, atobj in atoms.iteritems()])#atom dictionary - atom resideu id : atom serial number 1054 atdict=sortdictval(atdict)#sorted atom list of tuples 1055 for atitem in atdict:#iterate list of tuples 1056 atkey=atitem[0]#atom key 1057 atobj=atoms[atkey]#atom object 1058 atname=atobj.getatname()#get atom id name 1059 atsymbol=OEGetAtomicSymbol(atobj.getatnum())#get atom symbol 1060 atcoords=atobj.getatcoords()#get atom coordinates 1061 atbfactor=atobj.getatbfactor()#get atom b factor 1062 atishet=atobj.ishetat()#get heteroatom flag 1063 if not self.kwargs['hetat']:#macromolecule without heteroatoms (water and ions) 1064 if atishet: continue #omit atoms with heteroatom flag true 1065 atoccup=atobj.getatoccup()#get atom occupancy 1066 atsernum=atobj.getatsernum()#get atom serial number 1067 ataltloc=atobj.getataltloc()#get atom alternate location 1068 if ataltloc!=" ": 1069 atname=atname[:-2] 1070 if not (atcoords or atsymbol):#not atom coordinates or atom symbol 1071 print 'Error: No atomic coordinates or atomic symbol available!' 1072 sys.exit(1) 1073 atype='OEElemNo_%s'%atsymbol#atom symbol 1074 newat=mol.NewAtom(eval(atype))#create new oe atom 1075 at=OEAtomGetResidue(newat) 1076 if ataltloc is not None: 1077 at.SetAlternateLocation(ataltloc) 1078 if atbfactor is not None: 1079 at.SetBFactor(atbfactor)#set atom b factor 1080 if chainid is not None: 1081 at.SetChainID(chainid)#set atom chain id 1082 if molfragnum is not None: 1083 at.SetFragmentNumber(molfragnum)#set atom fragment number 1084 if atishet is not None: 1085 at.SetHetAtom(atishet)#set atom heteroatom flag 1086 if molinscode is not None: 1087 at.SetInsertCode(molinscode)#set atom insertion code 1088 if modelid is not None: 1089 at.SetModelNumber(modelid)#set atom model id 1090 if molname is not None: 1091 at.SetName(molname)#set atom residue name 1092 if atname is not None: 1093 newat.SetName(atname)#set atom name 1094 if molid is not None: 1095 at.SetResidueNumber(molid)# set atom residue id 1096 if atoccup is not None: 1097 at.SetOccupancy(atoccup)# set atom occupancy 1098 if molsecstruct is not None: 1099 at.SetSecondaryStructure(molsecstruct)#set atom residue secondary structure 1100 if atsernum is not None: 1101 at.SetSerialNumber(atsernum)#set atom serial number 1102 OEAtomSetResidue(newat,at)# set atom features to new atom 1103 mol.SetCoords(newat,atcoords)# set coordinates to oe atom 1104 else: 1105 print 'Error: No atoms available!' 1106 sys.exit(1) 1107 return mol
1108
1109 - def writepdbfile(self):
1110 """ 1111 write pdb file 1112 INPUT: 1113 class object 1114 OUTPUT: 1115 file 1116 """ 1117 mol=self.createmacmol() 1118 ofs=oemolostream() 1119 if self.pdbfullfilename and not ofs.open(self.pdbfullfilename): 1120 OEThrow.Fatal('Cannot open: %s' %self.pdbfullfilename) 1121 1122 ofs.SetFormat(OEFormat_PDB) 1123 1124 if OEHasResidues(mol): 1125 OEPDBOrderAtoms(mol) 1126 else: 1127 OEPerceiveResidues(mol) 1128 if mol.GetDimension()<3: 1129 flags=OEPDBOFlag_ORDERS|OEPDBOFlag_BONDS 1130 else: 1131 flags=OEPDBOFlag_DEFAULT 1132 OEWritePDBFile(ofs,mol,flags)
1133 ############### End of class ########################################################### 1134 ######################################################################################## 1135 ############## MAIN #################################################################### 1136 ############## Example of usage ######################################################## 1137 if __name__ == "__main__": 1138 pass 1139 # profile='/tmp/pdbfile/3ERDtest2.pdb' 1140 # profile='/tmp/pdbfile/3ERD.pdb' 1141 # profile='/tmp/pdbfile/macmol2pdb.pdb' 1142 # profile='/tmp/pdbfile/3ERDtest8.pdb' 1143 # profile='/tmp/pdbfile/1DP3.pdb' 1144 # profile='/tmp/pdbfile/1JFN.pdb' 1145 # profile='/tmp/pdbfile/1Z17.pdb' 1146 # headerdict,ligands,macromolecule=PDBFile(pdbfilepath=profile,repflag=False) 1147 # if ligands: 1148 # for ligkey,ligobj in ligands.iteritems(): 1149 # createSDFile(ligobj,dirpath='/tmp/pdbfile',sdf2pdb=True,addH=True) 1150 # else: 1151 # print 'No ligands available in %s ' %profile 1152 # if macromolecule: 1153 # createPDBFile(macromolecule,hetat=False,headerdict=headerdict,dirpath='/tmp/pdbfile',pdbfile='3ERDwithouthetat.pdb') 1154 # createPDBFile(macromolecule,hetat=True,dirpath='/tmp/pdbfile',pdbfile='3ERDwithhetat.pdb') 1155 # else: 1156 # print 'No macromolecule available in %s' %profile 1157