Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package Data2DB :: Module PropDrugBank
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.Data2DB.PropDrugBank

  1  #!/usr/bin/env python 
  2  ########################################## 
  3  # PropDrugBank.py                        # 
  4  # Parse Property DrugBank database file  # 
  5  ########################################## 
  6   
  7  ###################################################### 
  8  # Copyright (c) 2007-2008 Andrzej Bak                # 
  9  # ARC Seibersdorf & University of Silesia            # 
 10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
 11  # License: GNU General Public License, version: 3    # 
 12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
 13  # Version: 1, 06.01.2010                             # 
 14  ###################################################### 
 15   
 16  try: 
 17      import sys 
 18      import os 
 19      from MoSTBioDat.DataBase.ImportData.Data2DB.PropFile import PropFile 
 20      from openeye.oechem import * 
 21      import anydbm 
 22  except ImportError,e: 
 23      print 'Error: %s' %e 
 24      sys.exit(1) 
 25  ########## Property Drug Bank file class ############### 
26 -class PropDrugBank(PropFile):
27 - def __init__(self,propfile=None,**kwargs):
28 """ 29 Property Drug Bank File parser 30 INPUT: 31 propfile - property file path, str, default=None 32 dictpath - str - path to dictionary 33 dictfilename - str - dictionary filename 34 dbfile - str - database file path 35 format - str - logging file format, default "%(levelname)s %(asctime)s %(ip)s %(username)s %(filename)s %(module)s %(funcName)s %(lineno)d >> %(message)s " 36 filter - specified record to be logged 37 datefmt - str - date format 38 path - str - path to log files 39 filename - str - log filename, default 'log' 40 filemode - str - log file mode, default 'wa' 41 level - str - logging level, default logging.DEBUG 42 logfilelevel - log file level 43 temporary - str - temporary directory 44 OUTPUT: 45 dictionary object 46 """ 47 PropFile.__init__(self,propfile=propfile,**kwargs)
48
49 - def getCard(self):
50 """ 51 get DrugCard frame 52 INPUT: 53 class object 54 OUTPUT: 55 DrugCard string 56 """ 57 buffer='' 58 while True: 59 line=self.file.readline() 60 if not line: break 61 buffer+=line 62 if line.startswith("#END_DRUGCARD"): 63 yield buffer 64 buffer = ''
65
66 - def parseCard(self,card):
67 """ 68 parse DrugCard 69 INPUT: 70 class object 71 card - str, card frame 72 OUTPUT: 73 result - dict, result dictionary 74 """ 75 result={} 76 lines=card.splitlines()#split lines '/n' 77 linelen=len(lines) 78 i=0 79 while i<linelen: 80 if lines[i].startswith('#'): 81 lines[i]=lines[i][1:].strip() 82 if lines[i].endswith(':'): 83 lines[i]=lines[i][:-1].rstrip() 84 if lines[i].startswith('BEGIN_DRUGCARD') or lines[i].startswith('END_DRUGCARD'): 85 continue 86 j=i+1 87 value='' 88 while lines[j]: 89 val=lines[j] 90 value+=val 91 j+=1 92 result[lines[i]]=value 93 i=j 94 i+=1 95 return result
96
97 - def proParse(self,spacedelim='\t'):
98 """ 99 Property text file parser, first line - property definition 100 INPUT: 101 class object 102 spacedelim - space delimiter specificator, default = '\t' 103 OUTPUT: 104 """ 105 nmol=0#molecule counter 106 nmolins=0#molecule inserted into dictionary 107 notmolins=0#not inserted into dictionary 108 sw=OEStopwatch()#time counter 109 sw.Start() 110 dots=OEDots(20000,100,'>> Compounds')#dots progress indicator 111 self.openDict() 112 if not self.openDict():#can not open shelve dictionary 113 sys.exit(1) 114 self.log.info('Parsing Property text file: %s',self.kwargs['dbfile']) 115 print 'Parsing %s, please wait ...' %self.kwargs['dbfile'] 116 for card in self.getCard(): 117 nmol+=1 118 dots.Update()#dots progress indicator update 119 propdict=self.parseCard(card)#get SDF fields 120 isosmi=propdict.get('Smiles_String_isomeric',None)#get isomeric SMILE code 121 if isosmi:#if SMILE attribute exists 122 dictobj=self.getDict(isosmi)#get dictionary object for given isomeric smile 123 if dictobj:#if object in dictionary 124 if dictobj.kwargs.has_key('property'):#if has key 125 dictobj.kwargs['property'].update(propdict) 126 else: 127 dictobj.kwargs['property']=propdict 128 try: 129 self.insertDict(isosmi,dictobj)#insert updated object into dictionary 130 nmolins+=1 131 except anydbm.error, error: 132 print 'Error: %s' %error 133 self.log.exception('Error: %s',error) 134 self.closeDict() 135 sys.exit(1) 136 else: 137 notmolins+=1 138 continue#no object in dictionary 139 else: 140 print 'No SMILE attribute!' 141 notmolins+=1 142 continue# if not SMILE attribute 143 self.closeDict() 144 dots.Total() 145 print "SDF time reading: %.2f s" %(sw.Elapsed()) 146 self.log.info('Read %s SMILES in %.2f s, %s: inserted %s, refused %s',nmol,sw.Elapsed(),self.kwargs['dictfilename'],nmolins,notmolins) 147 print "%s: %s molecules inserted, %s refused!" %(self.kwargs['dictfilename'],nmolins,notmolins) 148 self.logobj.rmLogHandler()
149
150 - def proParseonDrugDBNum(self,spacedelim='\t'):
151 """ 152 Property text file parser based on DrugBank DataBase Number 153 INPUT: 154 class object 155 spacedelim - space delimiter specificator, default = '\t' 156 OUTPUT: 157 """ 158 nmol=0#molecule counter 159 nmolins=0#molecule inserted into dictionary 160 notmolins=0#not inserted into dictionary 161 sw=OEStopwatch()#time counter 162 sw.Start() 163 dots=OEDots(20000,100,'>> Compounds')#dots progress indicator 164 self.openDict() 165 if not self.openDict():#can not open shelve dictionary 166 sys.exit(1) 167 self.log.info('Parsing Property text file: %s',self.kwargs['dbfile']) 168 print 'Parsing %s, please wait ...' %self.kwargs['dbfile'] 169 for card in self.getCard(): 170 nmol+=1 171 dots.Update()#dots progress indicator update 172 propdict=self.parseCard(card)#get SDF fields 173 dbnum=propdict.get('Primary_Accession_No',None)#get isomeric SMILE code 174 if dbnum:#if database number attribute exists 175 if dbnum!='Not Available': 176 for dictobj in self.retDict().itervalues():#iterate object in dictionary 177 if dictobj.hasAttr('isosmi'):#if has isosmi attribute 178 isosmi=dictobj.kwargs['isosmi']#get isomeric SMILE 179 else: 180 continue#attribute not exists 181 if dictobj.hasAttr('property'):#dictionary object has attribute 182 dictobjdbnum=dictobj.kwargs['property'].get('DRUGBANK_ID',None)#get dictionary object database number 183 if dictobjdbnum:#if dictionary database value exists 184 if dictobjdbnum==dbnum:#dictionary database object equals database number 185 dictobj.kwargs['property'].update(propdict) 186 try: 187 self.insertDict(isosmi,dictobj)#insert updated object into dictionary 188 nmolins+=1 189 except anydbm.error, error: 190 print 'Error: %s' %error 191 self.log.exception('Error: %s',error) 192 self.closeDict() 193 sys.exit(1) 194 break 195 else:#numbers are not equal 196 continue 197 else:#dictionary database number not exists 198 continue 199 else: 200 notmolins+=1 201 continue#no property attribute in dictionary object 202 else: 203 print 'Database number not available' 204 notmolins+=1 205 continue#attribute exist but is not available 206 else: 207 print 'No SMILE attribute!' 208 notmolins+=1 209 continue# if not SMILE attribute 210 self.closeDict() 211 dots.Total() 212 print "SDF time reading: %.2f s" %(sw.Elapsed()) 213 notmolins=nmol-nmolins 214 self.log.info('Read %s SMILES in %.2f s, %s: inserted %s, refused %s',nmol,sw.Elapsed(),self.kwargs['dictfilename'],nmolins,notmolins) 215 print "%s: %s molecules inserted, %s refused!" %(self.kwargs['dictfilename'],nmolins,notmolins) 216 self.logobj.rmLogHandler()
217 218 219 ############### End of class ########################################## 220 ############ MAIN ##################################################### 221 ############ example of usage ######################################### 222 if __name__=='__main__': 223 pass 224 # A=PropDrugBank('/tmp/testFile/drugprop.txt',dictpath='/home/baka/SDict',dictfilename='SDict2DBStdInp0',path='/tmp/Log',filename='propDrugBank') 225 # A.proParse() 226