Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package Data2DB :: Module SmiFile
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.Data2DB.SmiFile

  1  #!/usr/bin/env python 
  2  ##################### 
  3  # SmiFile.py        # 
  4  # Parse *.smi file  # 
  5  ##################### 
  6   
  7  ###################################################### 
  8  # Copyright (c) 2007-2008 Andrzej Bak                # 
  9  # ARC Seibersdorf & University of Silesia            # 
 10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
 11  # License: GNU General Public License, version: 3    # 
 12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
 13  # Version: 1, 06.01.2010                             # 
 14  ###################################################### 
 15   
 16  try: 
 17      import sys 
 18      import os 
 19      import anydbm 
 20      from openeye.oechem import * 
 21      from MoSTBioDat.DataBase.ImportData.Data2DB.DBFile import DBFile,InputDB 
 22      from MoSTBioDat.Log.MoSTBioDatLog import  MoSTBioDatLog 
 23  except ImportError,e: 
 24      print 'Error: %s' %e 
 25      sys.exit(1) 
 26   
 27  ######## SMILE file class ############################ 
28 -class SmiFile(DBFile):
29 """ 30 Simple SMI file parser 31 INPUT: 32 dictpath - str - path to dictionary 33 dictfilename - str - dictionary filename 34 dbfile - str - database file path 35 temporary - str - path to temporary dictionary file 36 37 format - string format for log handler 38 filter - filter object from logger object 39 datefmt - data/time format 40 path - directory path to log file 41 filename - log file name, default log 42 filemode - mode to open log file, default='a' 43 level - set root logger level to specified level 44 logfilelevel- set level to log file 45 OUTPUT: 46 dictionary object 47 """
48 - def __init__(self,smifile,**kwargs):
49 DBFile.__init__(self,dbfile=smifile,**kwargs) 50 try: 51 self.logobj=MoSTBioDatLog(**kwargs)#create logging object 52 self.log=self.logobj.getLogHandler()#create logging handler 53 except IOError,e: 54 print 'Error: %s, %s' %(e[0],e[1]) 55 sys.exit(1)
56
57 - def parseSmi(self):
58 """ 59 Parse smi files 60 INPUT: 61 class object 62 OUTPUT: 63 """ 64 nmol=0#molecule counter 65 nmolins=0#molecule inserted into dictionary 66 notmolins=0#not inserted into dictionary 67 mol=OEGraphMol()#create molecule 68 sw=OEStopwatch()#time counter 69 sw.Start() 70 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 71 self.openDict() 72 if not self.openDict():#can not open shelve dictionary 73 sys.exit(1) 74 self.log.info('Parsing SMI file: %s',self.kwargs['dbfile']) 75 print 'Parsing SMI file, please wait ...' 76 while True: 77 line=self.file.readline() 78 if not line: break 79 dots.Update() 80 nmol+=1 81 isosmi=line.split(' ')[0]#isomeric smile 82 filename=line.split(' ')[1] 83 if filename.endswith('\n'):#remove \n 84 filename=filename[:-1] 85 mol.Clear()#clear molecule 86 if OEParseSmiles(mol,isosmi):#check if valid smile 87 OEFindRingAtomsAndBonds(mol) 88 OEAssignAromaticFlags(mol)#normalize aromaticity 89 cansmi=OECreateCanSmiString(mol)#create canonical smile 90 else: 91 print "Invalid SMILE code: %s\n!" %isosmi 92 self.log.exception('Invalid SMILE code: %s',filename) 93 notmolins+=1 94 continue 95 inobj=InputDB(isosmi=isosmi,cansmi=cansmi,filename=filename) 96 ###create input DataBase generic object 97 if not (self.getDict(filename)): 98 ###if object not in dictionary 99 try: 100 self.insertDict(isosmi,inobj) 101 self.insertDict(filename,isosmi) 102 nmolins+=1 103 ####keys: 'cansmi' -> object, 'filename' -> canonical smile 104 except anydbm.error, error: 105 print 'Error: %s' %error 106 self.log.exception('Error: %s',error) 107 self.closeDict() 108 sys.exit(1) 109 else: 110 notmolins+=1 111 continue#if object in dictionary 112 self.closeDict() 113 del(mol) 114 dots.Total() 115 print "SMILE time reading: %.2f s" %(sw.Elapsed()) 116 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,sw.Elapsed(),nmolins,notmolins) 117 print "%s molecules inserted, %s refused!" %(nmolins,notmolins) 118 self.logobj.rmLogHandler()
119 ################## End of class ##################################### 120 ################### MAIN ############################################ 121 ################# example of usage ################################## 122 if __name__=='__main__': 123 pass 124 # A=SmiFile('/tmp/testFile/ZINC.smi',dictpath='/tmp/SDict',dictfilename='SDict2DBStdInp',path='/tmp/Log',filename='smifile') 125 # A.parseSmi() 126 # A.showDict() 127