Package install :: Package MoSTBioDat :: Package DataBase :: Package SubStructSearch :: Module SubStructSearch
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.SubStructSearch.SubStructSearch

  1  #!/usr/bin/env python 
  2  ####################################### 
  3  # SubStructSearch.py                  # 
  4  # Substructure DataBase Search module # 
  5  ####################################### 
  6   
  7  ###################################################### 
  8  # Copyright (c) 2007-2008 Andrzej Bak                # 
  9  # ARC Seibersdorf & University of Silesia            # 
 10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
 11  # License: GNU General Public License, version: 3    # 
 12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
 13  # Version: 1, 06.01.2010                             # 
 14  ###################################################### 
 15   
 16  try: 
 17      import sys 
 18      import os 
 19      from openeye.oechem import * 
 20      from MoSTBioDat.DataBase.ImportData.Data2DB.Smile import Smile 
 21      from pybel import Outputfile,Molecule,Atom,Fingerprint,Smarts,MoleculeData,readfile,readstring 
 22      from MoSTBioDat.DataBase.SubStructSearch.DB2SmiDict import DB2SmiDict, Smi2SmiDict 
 23      from MoSTBioDat.DataBase.Connect.MoSTBioDatErrors import Error 
 24      from MoSTBioDat.Log.MoSTBioDatLog import  MoSTBioDatLog,geTime 
 25  except ImportError,e: 
 26      print 'Error: %s' %e 
 27      sys.exit(1) 
 28  ########## Substructure Search class ################# 
29 -class SubStructSearch(object):
30 """ 31 Substructure search module 32 INPUT: 33 smidict - dict, id:smile dictionary 34 OUTPUT: 35 class object 36 """
37 - def __init__(self,smidict={},**kwargs):
38 print 'Substructure Search/Pattern matching.' 39 self.smidict=smidict 40 try: 41 self.logobj=MoSTBioDatLog(**kwargs)#create logging object 42 self.log=self.logobj.getLogHandler()#create logging handler 43 except IOError,e: 44 print 'Error: %s, %s' %(e[0],e[1]) 45 sys.exit(1) 46 if not self.smidict:#if smile dictionary exists and not empty 47 print 'Error: SMILE dictionary not available or empty!' 48 self.log.error('SMILE dictionary not available or empty') 49 sys.exit(1) 50 self.kwargs={} 51 self.kwargs=kwargs 52 if not self.kwargs.has_key('temporary'): 53 self.kwargs.setdefault('temporary','/tmp/DB2Smi')
54
55 - def writefile(self,smifilepath='db2smi.smi'):
56 """ 57 write to specified file 58 INPUT: 59 filepath - str, path to file 60 OUTPUT: 61 file 62 """ 63 try: 64 smifilepath=str(smifilepath) 65 except ValueError,e: 66 print 'Error: %s' %e 67 sys.exit(1) 68 smifullfilename=self.createfilepath(filepath=smifilepath) 69 print 'SMILE file creation, please wait ...' 70 nmol=0#molecule counter 71 nmolins=0#molecule inserted into file 72 notmolins=0#not inserted into into file 73 sw=OEStopwatch()#time counter 74 sw.Start() 75 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 76 mol=OEGraphMol() 77 ofs=oemolostream() 78 if smifullfilename and not ofs.open(smifullfilename): 79 OEThrow.Fatal('Cannot open: %s' %smifullfilename) 80 ofs.SetFormat(OEFormat_ISM)#isomeric SMILE 81 for idkey, smile in self.smidict.iteritems(): 82 mol.Clear() 83 dots.Update() 84 nmol+=1 85 if not (OEParseSmiles(mol,smile)==1): 86 print 'Warning %s: SMILE string is invalid for %s!' %(smile,idkey) 87 self.log.warning('%s: SMILE string is invalid for %s',smile,idkey) 88 notmolins+=1 89 continue 90 nmolins+=1#molecule inserted into file 91 mol.SetTitle(str(idkey)) 92 OEWriteMolecule(ofs,mol) 93 ofs.close() 94 dots.Total() 95 calctime=sw.Elapsed()#calculation time 96 if calctime>60: 97 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 98 else: 99 print "SMILE time writing: %.2f s" %(calctime) 100 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 101 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
102
103 - def createfilepath(self,filepath):
104 """ 105 create SMILE filepath 106 INPUT: 107 class object 108 OUTPUT: 109 smi filename absolute path 110 """ 111 smifile=os.path.basename(filepath) 112 dirpath=os.path.dirname(filepath) 113 if dirpath=='': 114 dirpath=os.path.abspath(os.curdir) 115 if not os.path.isdir(dirpath): 116 try: 117 os.makedirs(dirpath) 118 except Exception,error: 119 print 'Error: %s, %s for %s' %(error[0],error[1],dirpath) 120 print 'File in %s' %self.kwargs['temporary']#create directory in temporary path 121 dirpath=self.kwargs['temporary'] 122 if not os.path.exists(dirpath): 123 os.makedirs(dirpath) 124 else: 125 if not os.access(dirpath,os.W_OK): 126 print 'Error: Permission denied! Unable to write in %s' %dirpath 127 dirpath=self.kwargs['temporary'] 128 if not os.path.exists(dirpath): 129 os.makedirs(dirpath) 130 elif not os.access(dirpath,os.W_OK): 131 print 'Error: Permission denied! Unable to write in %s' %dirpath 132 sys.exit(1) 133 fullfilename=os.path.join(dirpath,smifile) 134 if os.path.isfile(fullfilename): 135 print 'Error: Specified file: %s exists in %s' %(smifile,dirpath) 136 sys.exit(1) 137 print 'File path: %s' %fullfilename 138 return fullfilename
139
140 - def TanimotoSearch(self,refersmile='',iso=False,coeff=None,fptype='FP2',outfile='TanimotoSearch.txt'):
141 """ 142 search for substructures with specified Tanimoto coefficient 143 INPUT: 144 refersmile -str, reference SMILE code 145 iso - boolean, create isomeric SMILE, default False 146 coeff - float, Tanimoto coefficient, default None (generally it should be set >0.7) 147 fptype -- the name of the Open Babel fingerprint type. 148 FP2 -- Indexes linear fragments up to 7 atoms. 149 FP3 -- SMARTS patterns specified in the file patterns.txt 150 FP4 -- SMARTS patterns specified in the file SMARTS_InteLigand.txt 151 outfile - str, output file path 152 OUTPUT: 153 file 154 """ 155 if coeff is None: 156 print 'Error: Tanimoto coefficient not available!' 157 self.log.error('Tanimoto coefficient not available') 158 sys.exit(1) 159 160 if not refersmile:#no reference smile code 161 print 'Error: Reference SMILE code not available!' 162 self.log.error('Reference SMILE code not available') 163 sys.exit(1) 164 165 mincoeff=0#minimal Tanimoto coefficient 166 maxcoeff=1#maximal Tanimoto coefficient 167 if not (mincoeff <= coeff <=maxcoeff):#specified coefficient not in range 168 print 'Error: Incorrect Tanimoto coefficient value! Must be in [0-1] range.' 169 self.log.error('Incorrect Tanimoto coefficient value! Must be in [0-1] range') 170 sys.exit(1) 171 172 try: 173 coeff=float(coeff)#convert to float 174 except ValueError,e: 175 print 'Error: Incorrect Tanimoto coefficient value!' 176 self.log.error('Incorrect Tanimoto coefficient value') 177 sys.exit(1) 178 179 try: 180 outfile=str(outfile)#convert to string 181 except ValueError,e: 182 print 'Error: %s' %e 183 sys.exit(1) 184 outfullfilename=self.createfilepath(filepath=outfile)#output fullfile path 185 outfile=open(outfullfilename,'w')#open output file 186 refsmiobj=Smile(smile=refersmile)#Smile class object 187 if iso: 188 refersmile=refsmiobj.CanSmi(mol=refsmiobj.getMol(),iso=True,kek=False,verbose=True)#create isomeric smile 189 try: 190 refsmi=readstring("smi",refersmile)#create pybel Molecule class object 191 except IOError, e: 192 print 'Error: %s'%e 193 sys.exit(1) 194 195 fprefsmi=refsmi.calcfp(fptype=fptype)#reference smile finger print 196 outfile.write('reference SMILE: %s, Tanimoto coefficient: %4.3f\n' %(refersmile,coeff))#write header to output file 197 print 'Searching SMILE dictionary, please wait ...' 198 nmol=0#molecule counter 199 nmolins=0#molecule inserted into file 200 notmolins=0#not inserted into into file 201 sw=OEStopwatch()#time counter 202 sw.Start() 203 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 204 for id,targetsmile in self.smidict.iteritems(): 205 dots.Update() 206 nmol+=1 207 smiobj=Smile(smile=targetsmile)#check if smile correct 208 if iso: 209 targetsmile=smiobj.CanSmi(mol=smiobj.getMol(),iso=True,kek=False,verbose=True)#create isomeric smile 210 Tanimoto=self.calcTanimoto(fprefersmi=fprefsmi, targetsmi=targetsmile,fptype=fptype)#calculate Tanimoto coefficient 211 if Tanimoto==-1: 212 notmolins+=1 213 continue 214 if Tanimoto>=coeff: 215 nmolins+=1 216 outfile.write('%s %s %4.3f\n' %(targetsmile,id,Tanimoto))#write line to output file 217 else: 218 notmolins+=1 219 dots.Total() 220 outfile.close()#close output file 221 calctime=sw.Elapsed()#calculation time 222 if calctime>60: 223 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 224 else: 225 print "SMILE time writing: %.2f s" %(calctime) 226 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 227 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
228
229 - def calcTanimoto(self,fprefersmi=None,targetsmi='',fptype='FP2'):
230 """ 231 calculate Tanimoto coefficient 232 INPUT: 233 targetsmi - str, target SMILE code, default empty 234 fprefersmi - pybel.Fingerprint class object, default None 235 fptype -- the name of the Open Babel fingerprint type. 236 FP2 -- Indexes linear fragments up to 7 atoms. 237 FP3 -- SMARTS patterns specified in the file patterns.txt 238 FP4 -- SMARTS patterns specified in the file SMARTS_InteLigand.txt 239 OUTPUT: 240 Tanimoto coefficient - float [0-1] 241 """ 242 if not (targetsmi and fprefersmi): 243 print 'Error: Target SMILE code or reference fingerprint not available!' 244 return -1 245 try: 246 tarsmi=readstring("smi",targetsmi) 247 except IOError, e: 248 print 'Error: %s'%e 249 sys.exit(1) 250 fptargetsmi=tarsmi.calcfp(fptype=fptype) 251 return fprefersmi|fptargetsmi
252
253 - def SmartSearchPyBel(self,smart='',outfile='SmartSearchPyBel.txt'):
254 """ 255 search for substructures with specified SMART PyBel pattern 256 INPUT: 257 smart - str, reference SMILE code 258 outfile - str, output file path 259 OUTPUT: 260 file 261 """ 262 if not smart:#no reference smile code 263 print 'Error: SMART matching pattern not available!' 264 self.log.error('SMART matching pattern not available') 265 sys.exit(1) 266 try: 267 smart=str(smart) 268 except ValueError,e: 269 print 'Error: Incorrect SMART pattern matching!' 270 self.log.error('Incorrect SMART pattern matching') 271 sys.exit(1) 272 try: 273 header=smart 274 smart=Smarts(smart) 275 except IOError,e: 276 print 'Error: %s!'%e 277 self.log.error('%s',e) 278 sys.exit(1) 279 280 try: 281 outfile=str(outfile) 282 except ValueError,e: 283 print 'Error: %s' %e 284 sys.exit(1) 285 outfullfilename=self.createfilepath(filepath=outfile)#output fullfile path 286 outfile=open(outfullfilename,'w')#open output file 287 outfile.write('SMART pattern: %s\n' %header)#write header to output file 288 print 'Searching SMILE dictionary, please wait ...' 289 nmol=0#molecule counter 290 nmolins=0#molecule inserted into file 291 notmolins=0#not inserted into into file 292 sw=OEStopwatch()#time counter 293 sw.Start() 294 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 295 for id,targetsmile in self.smidict.iteritems(): 296 dots.Update() 297 nmol+=1 298 mol=readstring("smi",targetsmile) 299 patternmatch=smart.findall(mol) 300 if patternmatch: 301 nmolins+=1 302 outfile.write('%s %s\n' %(targetsmile,id))#write line to output file 303 else: 304 notmolins+=1 305 dots.Total() 306 outfile.close()#close output file 307 calctime=sw.Elapsed()#calculation time 308 if calctime>60: 309 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 310 else: 311 print "SMILE time writing: %.2f s" %(calctime) 312 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 313 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
314
315 - def SmartSearchOE(self,smart='',outfile='SmartSearchOE.txt'):
316 """ 317 search for substructures with specified SMART OpenEye pattern 318 INPUT: 319 smart - str, reference SMILE code 320 outfile - str, output file path 321 OUTPUT: 322 file 323 """ 324 if not smart:#no reference smile code 325 print 'Error: SMART matching pattern not available!' 326 self.log.error('SMART matching pattern not available') 327 sys.exit(1) 328 try: 329 smart=str(smart) 330 except ValueError,e: 331 print 'Error: Incorrect SMART pattern matching!' 332 self.log.error('Incorrect SMART pattern matching') 333 sys.exit(1) 334 pat=OESubSearch() 335 if not pat.Init(smart): 336 print 'Error: Unable to parse SMARTS!' 337 self.log.error('Unable to parse SMARTS') 338 sys.exit(1) 339 try: 340 outfile=str(outfile) 341 except ValueError,e: 342 print 'Error: %s' %e 343 sys.exit(1) 344 outfullfilename=self.createfilepath(filepath=outfile)#output fullfile path 345 outfile=open(outfullfilename,'w')#open output file 346 outfile.write('SMART pattern: %s\n' %smart)#write header to output file 347 print 'Searching SMILE dictionary, please wait ...' 348 nmol=0#molecule counter 349 nmolins=0#molecule inserted into file 350 notmolins=0#not inserted into into file 351 sw=OEStopwatch()#time counter 352 sw.Start() 353 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 354 mol=OEGraphMol()#create OE molecule class object 355 for id,targetsmile in self.smidict.iteritems(): 356 dots.Update() 357 nmol+=1 358 mol.Clear() 359 if (OEParseSmiles(mol,targetsmile)): 360 if pat.SingleMatch(mol): 361 nmolins+=1 362 outfile.write('%s %s\n' %(targetsmile,id))#write line to output file 363 else: 364 notmolins+=1 365 else: 366 notmolins+=1 367 dots.Total() 368 outfile.close()#close output file 369 calctime=sw.Elapsed()#calculation time 370 if calctime>60: 371 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 372 else: 373 print "SMILE time writing: %.2f s" %(calctime) 374 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 375 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
376
377 - def QuerySearch(self,refersmile='',outfile='QuerySearch.txt',**kwargs):
378 """ 379 search for substructures with Query OE pattern 380 INPUT: 381 refersmile -str, reference SMILE code 382 outfile - str, output file path 383 atomatch - int or OE predefined settings, default OEExprOpts_DefaultAtoms 384 bondmatch - int or OE predefined settings, default OEExprOpts_DefaultBonds 385 OUTPUT: 386 file 387 """ 388 if not kwargs.has_key('atomatch'): 389 kwargs.setdefault('atomatch',OEExprOpts_DefaultAtoms) 390 if not kwargs.has_key('bondmatch'): 391 kwargs.setdefault('bondmatch',OEExprOpts_DefaultBonds) 392 if not refersmile:#no reference smile code 393 print 'Error: Reference SMILE code not available!' 394 self.log.error('Reference SMILE code not available') 395 sys.exit(1) 396 refmol=OEMol()#create reference OE molecule class object 397 398 if not OEParseSmiles(refmol,refersmile): 399 print 'Error: Unable to parse SMILE/SMART!' 400 self.log.error('Unable to parse SMARTS/SMART') 401 sys.exit(1) 402 try: 403 outfile=str(outfile)#convert to string 404 except ValueError,e: 405 print 'Error: %s' %e 406 sys.exit(1) 407 outfullfilename=self.createfilepath(filepath=outfile)#output fullfile path 408 outfile=open(outfullfilename,'w')#open output file 409 outfile.write('reference SMILE: %s, atom match setting: %s, bond match setting: %s\n' %(refersmile,kwargs['atomatch'],kwargs['bondmatch']))#write header to output file 410 print 'Searching SMILE dictionary, please wait ...' 411 nmol=0#molecule counter 412 nmolins=0#molecule inserted into file 413 notmolins=0#not inserted into into file 414 sw=OEStopwatch()#time counter 415 sw.Start() 416 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 417 targetmol=OEMol() 418 for id,targetsmile in self.smidict.iteritems(): 419 dots.Update() 420 nmol+=1 421 targetmol.Clear() 422 if (OEParseSmiles(targetmol,targetsmile)): 423 qmol=OEQMol(refmol) 424 qmol.BuildExpressions(kwargs['atomatch'],kwargs['bondmatch']) 425 pat=OESubSearch(qmol) 426 if pat.SingleMatch(targetmol): 427 nmolins+=1 428 outfile.write('%s %s\n' %(targetsmile,id))#write line to output file 429 else: 430 notmolins+=1 431 else: 432 notmolins+=1 433 dots.Total() 434 outfile.close()#close output file 435 calctime=sw.Elapsed()#calculation time 436 if calctime>60: 437 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 438 else: 439 print "SMILE time writing: %.2f s" %(calctime) 440 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 441 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
442
443 - def MCSearch(self,refersmile='',outfile='MCSearch.txt',**kwargs):
444 """ 445 search for maximum common substructure 446 INPUT: 447 refersmile -str, reference SMILE code 448 outfile - str, output file path 449 atomatch - int or OE predefined settings, default OEExprOpts_DefaultAtoms 450 bondmatch - int or OE predefined settings, default OEExprOpts_DefaultBonds 451 searchtype - OE options: 452 default OEMCSType_Approximate (faster) 453 OEMCSType_Exhaustive 454 scorefunc - OE functions: 455 default OEMCSMaxAtoms() -> number of mapped atoms + number of mapped bonds/100 456 OEMCSMaxBonds() -> number of mapped bonds + number of mapped atoms/100 457 OEMCSMaxAtomsCompleteCycles() -> number of mapped atoms + number of mapped bonds/100 458 OEMCSMaxBondsCompleteCycles() -> number of mapped bonds + number of mapped atoms/100 459 numatoms - int, minimum atoms number required of subgraph match, default 4 460 uniqueflag - boolean, unique or non-unique substructure searching, default True 461 OUTPUT: 462 file 463 """ 464 if not kwargs.has_key('atomatch'): 465 kwargs.setdefault('atomatch',OEExprOpts_DefaultAtoms) 466 if not kwargs.has_key('bondmatch'): 467 kwargs.setdefault('bondmatch',OEExprOpts_DefaultBonds) 468 if not kwargs.has_key('searchtype'): 469 kwargs.setdefault('searchtype',OEMCSType_Approximate) 470 if not kwargs.has_key('scorefunc'): 471 kwargs.setdefault('scorefunc',OEMCSMaxAtoms()) 472 if not kwargs.has_key('numatoms'): 473 kwargs.setdefault('numatoms',4) 474 if not kwargs.has_key('uniqueflag'): 475 kwargs.setdefault('uniqueflag',True) 476 477 if not refersmile:#no reference smile code 478 print 'Error: Reference SMILE code not available!' 479 self.log.error('Reference SMILE code not available') 480 sys.exit(1) 481 refmol=OEGraphMol()#create reference OE molecule class object 482 483 if not OEParseSmiles(refmol,refersmile): 484 print 'Error: Unable to parse SMILE/SMART!' 485 self.log.error('Unable to parse SMARTS/SMART') 486 sys.exit(1) 487 try: 488 outfile=str(outfile)#convert to string 489 except ValueError,e: 490 print 'Error: %s' %e 491 sys.exit(1) 492 outfullfilename=self.createfilepath(filepath=outfile)#output fullfile path 493 outfile=open(outfullfilename,'w')#open output file 494 outfile.write('reference SMILE/SMART: %s, atom match: %s, bond match: %s, search type %s, atom number: %s\n' %(refersmile,kwargs['atomatch'],kwargs['bondmatch'],kwargs['searchtype'],kwargs['numatoms']))#write header to output file 495 print 'Searching SMILE dictionary, please wait ...' 496 nmol=0#molecule counter 497 nmolins=0#molecule inserted into file 498 notmolins=0#not inserted into into file 499 sw=OEStopwatch()#time counter 500 sw.Start() 501 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 502 targetmol=OEGraphMol() 503 ### create maximum common substructure object 504 mcss = OEMCSSearch(refmol,kwargs['atomatch'],kwargs['bondmatch'],kwargs['searchtype']) 505 ### set scoring function 506 mcss.SetMCSFunc(kwargs['scorefunc']) 507 ### ignore matches smaller than number atoms 508 mcss.SetMinAtoms(kwargs['numatoms']) 509 for id,targetsmile in self.smidict.iteritems(): 510 dots.Update() 511 nmol+=1 512 targetmol.Clear() 513 if (OEParseSmiles(targetmol,targetsmile)): 514 count = 1 515 matchflag=False 516 for match in mcss.Match(targetmol,kwargs['uniqueflag']): 517 matchflag=True 518 if not matchflag: 519 notmolins+=1 520 continue 521 nmolins+=1 522 outfile.write('\n%s %s\n' %(targetsmile,id))#write line to output file 523 for match in mcss.Match(targetmol,kwargs['uniqueflag']): 524 matchflag=True # 525 outfile.write("match %d:" % count) 526 outfile.write("\npattern atoms: ") 527 for ma in match.GetAtoms(): 528 outfile.write("%d " % ma.pattern.GetIdx()) 529 outfile.write("\ntarget atoms: ") 530 for ma in match.GetAtoms(): 531 outfile.write("%d " % ma.target.GetIdx()) 532 count+=1 533 ### create match subgraph 534 m = OEGraphMol() 535 OESubsetMol(m,match,True) 536 smi=OECreateSmiString(m,OESMILESFlag_ISOMERIC) # 537 outfile.write("\nmatch isosmi: %s \n" % smi) 538 else: 539 notmolins+=1 540 dots.Total() 541 outfile.close()#close output file 542 calctime=sw.Elapsed()#calculation time 543 if calctime>60: 544 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 545 else: 546 print "SMILE time writing: %.2f s" %(calctime) 547 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 548 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
549
550 - def CliqueSearch(self,refersmile='',outfile='CliqueSearch.txt',**kwargs):
551 """ 552 search for common substructure with OE clique detection 553 INPUT: 554 refersmile -str, reference SMILE code 555 outfile - str, output file path 556 atomatch - int or OE predefined settings, default OEExprOpts_DefaultAtoms 557 bondmatch - int or OE predefined settings, default OEExprOpts_DefaultBonds 558 diffnumatoms - int, ignore clique that differ by more than specified atoms number, default 5 559 uniqueflag - boolean, unique or non-unique substructure searching, default True 560 OUTPUT: 561 file 562 """ 563 if not kwargs.has_key('atomatch'): 564 kwargs.setdefault('atomatch',OEExprOpts_DefaultAtoms) 565 if not kwargs.has_key('bondmatch'): 566 kwargs.setdefault('bondmatch',OEExprOpts_DefaultBonds) 567 if not kwargs.has_key('diffnumatoms'): 568 kwargs.setdefault('diffnumatoms',4) 569 if not kwargs.has_key('uniqueflag'): 570 kwargs.setdefault('uniqueflag',True) 571 572 if not refersmile:#no reference smile code 573 print 'Error: Reference SMILE code not available!' 574 self.log.error('Reference SMILE code not available') 575 sys.exit(1) 576 refmol=OEGraphMol()#create reference OE molecule class object 577 578 if not OEParseSmiles(refmol,refersmile): 579 print 'Error: Unable to parse SMILE/SMART!' 580 self.log.error('Unable to parse SMARTS/SMART') 581 sys.exit(1) 582 try: 583 outfile=str(outfile)#convert to string 584 except ValueError,e: 585 print 'Error: %s' %e 586 sys.exit(1) 587 outfullfilename=self.createfilepath(filepath=outfile)#output fullfile path 588 outfile=open(outfullfilename,'w')#open output file 589 outfile.write('reference SMILE/SMART: %s, atom match: %s, bond match: %s, differ atom number: %s\n' %(refersmile,kwargs['atomatch'],kwargs['bondmatch'],kwargs['diffnumatoms']))#write header to output file 590 print 'Searching SMILE dictionary, please wait ...' 591 nmol=0#molecule counter 592 nmolins=0#molecule inserted into file 593 notmolins=0#not inserted into into file 594 sw=OEStopwatch()#time counter 595 sw.Start() 596 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 597 targetmol=OEGraphMol() 598 ### create clique earch object 599 cs = OECliqueSearch(refmol,kwargs['atomatch'],kwargs['bondmatch']) 600 ### ignore cliques that differ by more than 5 atoms from MCS 601 cs.SetSaveRange(kwargs['diffnumatoms']) 602 for id,targetsmile in self.smidict.iteritems(): 603 dots.Update() 604 nmol+=1 605 targetmol.Clear() 606 if (OEParseSmiles(targetmol,targetsmile)): 607 count = 1 608 matchflag=False 609 for match in cs.Match(targetmol): 610 matchflag=True 611 if not matchflag: 612 notmolins+=1 613 continue 614 nmolins+=1 615 outfile.write('\n%s %s\n' %(targetsmile,id))#write line to output file 616 for match in cs.Match(targetmol): 617 outfile.write("match %d:" % count) 618 outfile.write("\npattern atoms: ") 619 for ma in match.GetAtoms(): 620 outfile.write("%d " % ma.pattern.GetIdx()) 621 outfile.write("\ntarget atoms: ") 622 for ma in match.GetAtoms(): 623 outfile.write("%d " % ma.target.GetIdx()) 624 count += 1 625 outfile.write('\n') 626 else: 627 notmolins+=1 628 dots.Total() 629 outfile.close()#close output file 630 calctime=sw.Elapsed()#calculation time 631 if calctime>60: 632 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 633 else: 634 print "SMILE time writing: %.2f s" %(calctime) 635 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 636 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
637
638 - def RO5Search(self,outfile='RO5Search.txt',**kwargs):
639 """ 640 search for Lipinski Rule of Five 641 INPUT: 642 outfile - str, output file path 643 OUTPUT: 644 file 645 """ 646 if not kwargs.has_key('MolWT'): 647 kwargs.setdefault('MolWT',500) 648 if not kwargs.has_key('HBA'): 649 kwargs.setdefault('HBA',10) 650 if not kwargs.has_key('HBD'): 651 kwargs.setdefault('HBD',5) 652 if not kwargs.has_key('LogP'): 653 kwargs.setdefault('LogP',5) 654 655 try: 656 outfile=str(outfile)#convert to string 657 except ValueError,e: 658 print 'Error: %s' %e 659 sys.exit(1) 660 ### Lipinski rule of five definition ### 661 ro5=lambda desc: (desc['MolWT']<=kwargs['MolWT'] and desc['HBD']<=kwargs['HBD'] and desc['HBA']<=kwargs['HBA'] and desc['LogP']<=kwargs['LogP']) 662 outfullfilename=self.createfilepath(filepath=outfile)#output fullfile path 663 outfile=open(outfullfilename,'w')#open output file 664 outfile.write('RO5 => MolWT: %s, HBA: %s, HBD: %s, LogP: %4.3f\n' %(kwargs['MolWT'],kwargs['HBA'],kwargs['HBD'],kwargs['LogP']))#write header to output file 665 print 'Searching SMILE dictionary, please wait ...' 666 nmol=0#molecule counter 667 nmolins=0#molecule inserted into file 668 notmolins=0#not inserted into into file 669 sw=OEStopwatch()#time counter 670 sw.Start() 671 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 672 targetmol=OEGraphMol() 673 for id,targetsmile in self.smidict.iteritems(): 674 dots.Update() 675 nmol+=1 676 targetmol.Clear() 677 if (OEParseSmiles(targetmol,targetsmile)): 678 lipdesc=self.calcLipinskidesc(targetsmile)#calculete Lipinski descriptors dictionary 679 if ro5(lipdesc):#fulfill RO5 680 nmolins+=1 681 outfile.write('%s %s %s %s %s %4.3f\n' %(targetsmile,id,lipdesc['MolWT'],lipdesc['HBD'],lipdesc['HBA'],lipdesc['LogP']))#write line to output file 682 else: 683 notmolins+=1 684 else: 685 notmolins+=1 686 dots.Total() 687 outfile.close()#close output file 688 calctime=sw.Elapsed()#calculation time 689 if calctime>60: 690 print "SMILE time writing: %.2f s %s" %(calctime,geTime(calctime)) 691 else: 692 print "SMILE time writing: %.2f s" %(calctime) 693 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,calctime,nmolins,notmolins) 694 print "%s molecules inserted, %s refused!" %(nmolins,notmolins)
695
696 - def calcLipinskidesc(self,smile=''):
697 """ 698 calculate Lipinski descriptors 699 INPUT: 700 smile - str, SMILE code 701 OUTPUT: 702 lipinski descriptors dictionary 703 """ 704 if not smile:#no reference smile code 705 print 'Error: SMILE code not available!' 706 sys.exit(1) 707 refmol=OEGraphMol()#create reference OE molecule class object 708 709 if not OEParseSmiles(refmol,smile): 710 print 'Error: Unable to parse SMILE!' 711 sys.exit(1) 712 713 mol=readstring('smi',smile) 714 HBD=Smarts('[#7,#8;!H0]') 715 HBA=Smarts('[#7,#8]') 716 desc={ 717 'MolWT':mol.molwt, 718 'HBD':len(HBD.findall(mol)), 719 'HBA':len(HBA.findall(mol)), 720 'LogP':mol.calcdesc(['LogP'])['LogP'] 721 } 722 return desc
723 724 ################## End of class ######################################################## 725 ############## MAIN ################################################################ 726 ############ Example of usage ########################################################## 727 if __name__=='__main__': 728 pass 729 # print '*** Substructure search *** ' 730 # ### read data from database to Id:smile dictionary ### 731 # A=DB2SmiDict(host='localhost',db='ligand',user='',passwd='',path='/tmp/Log',filename='db2smi') 732 # db2smidict=A.readb(logdebug=False,lowercasetablenames=True) 733 # ### write Id:smiile dictionary to specified file ### 734 # B=SubStructSearch(smidict=db2smidict,path='/tmp/Log',filename='smisubsearch') 735 # B.writefile(smifilepath='/tmp/DB2Smi/db2smi2.smi') 736 737 # ### read date from smi file ### 738 # A=Smi2SmiDict(smifilepath='/tmp/DB2Smi/db2smi1.smi',path='/tmp/Log',filename='smi2smi') 739 # smi2smidict=A.readfile() 740 # ### write Id:smile dictionary to specified file ### 741 # B=SubStructSearch(smidict=smi2smidict,path='/tmp/Log',filename='smisubsearch') 742 # B.writefile(smifilepath='/tmp/DB2Smi/db2smi2.smi') 743 744 ### Tanimoto Search ### 745 # B.TanimotoSearch(refersmile='Cc1ccccc1',iso=False,coeff=0.1,fptype='FP2',outfile='/home/abak/DB2Smi/TanimotoSearch.txt') 746 # 747 # ## SMART Search base on PyBel ### 748 # B.SmartSearchPyBel(smart='[#6]', outfile='/tmp/DB2Smi/SmartSearchPyBel.txt') 749 # 750 # ## SMART Search based on OpenEye ### 751 # B.SmartSearchOE(smart='[#6]', outfile='/tmp/DB2Smi/SmartSearchOE.txt') 752 # 753 # ## Query Search based on OpenEye ### 754 # B.QuerySearch(refersmile='[#6]',atomatch=OEExprOpts_DefaultAtoms,bondmatch=OEExprOpts_DefaultBonds,outfile='/tmp/DB2Smi/QuerySearch.txt') 755 # 756 # ## Maximal Common Substructure Search based on OpenEye ### 757 # B.MCSearch(refersmile='c1cc(O)c(O)cc1CCN',atomatch=OEExprOpts_DefaultAtoms,bondmatch=OEExprOpts_DefaultBonds, 758 # scorefunc=OEMCSMaxAtoms(),numatoms=6,outfile='/tmp/DB2Smi/MCSearch.txt') 759 # 760 # ## Clique Search based on OpenEye ### 761 # B.CliqueSearch(refersmile='c1cc(O)c(O)cc1CCN',atomatch=OEExprOpts_DefaultAtoms,bondmatch=OEExprOpts_DefaultBonds, 762 # diffnumatoms=2,outfile='/tmp/DB2Smi/CliqueSearch.txt') 763 # 764 # ## Lipinski Rule of Five Search ### 765 # B.RO5Search(outfile='/tmp/DB2Smi/RO5Search.txt',MolWT=500,HBA=10,HBD=5,LogP=5) 766