Package install :: Package MoSTBioDat :: Package DataBase :: Package SubStructSearch :: Module DB2SmiDict
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.SubStructSearch.DB2SmiDict

  1  #!/usr/bin/env python 
  2  ############################################################### 
  3  # import Ligand ChemComp/ProtStat data to id:smile dictionary # 
  4  # or read smile file to id:smile dictionary                   # 
  5  ############################################################### 
  6   
  7  ###################################################### 
  8  # Copyright (c) 2007-2008 Andrzej Bak                # 
  9  # ARC Seibersdorf & University of Silesia            # 
 10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
 11  # License: GNU General Public License, version: 3    # 
 12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
 13  # Version: 1, 06.10.2010                             # 
 14  ###################################################### 
 15   
 16  try: 
 17      import sys 
 18      import os 
 19      import gzip 
 20      from openeye.oechem import * 
 21      from MoSTBioDat.DataBase.ImportData.Data2DB.TaBuilder import TaBuilder 
 22      from MoSTBioDat.DataBase.ImportData.Data2DB.InserTables import InserTables 
 23      from MoSTBioDat.DataBase.Connect.MoSTBioDatErrors import Error 
 24      from MoSTBioDat.Log.MoSTBioDatLog import  MoSTBioDatLog 
 25  except ImportError,e: 
 26      print 'Error: %s' %e 
 27      sys.exit(1) 
 28  ########## get DB2SmiFile class ###################### 
29 -class DB2SmiDict(TaBuilder,InserTables):
30 """ 31 get Ligand.ChemComp table data to id:smile dictionary 32 INPUT: 33 host - string, host to connect 34 user - string, user to connect as 35 passwd - string, password to use 36 db - string, database to use 37 port - integer, TCP/IP port to connect 38 log - boolen, logging flag 39 unix_socket - string, location of unix_socket to use 40 conv - conversion dictionary, see MySQLdb.converters 41 connect_timeout - number of seconds to wait before the connection attempt fails. 42 compress - if set, compression is enabled 43 named_pipe - if set, a named pipe is used to connect (Windows only) 44 init_command - command which is run once the connection is created 45 read_default_file - file from which default client values are read 46 read_default_group - configuration group to use from the default file 47 cursorclass - class object, used to create cursors (keyword only) 48 use_unicode - if True, text-like columns are returned as unicode objects 49 using the connection's character set. Otherwise, text-like 50 columns are returned as strings. columns are returned as 51 normal strings. Unicode objects will always be encoded to 52 the connection's character set regardless of this setting. 53 charset - if supplied, the connection character set will be changed 54 to this character set (MySQL-4.1 and newer). This implies 55 use_unicode=True. 56 sql_mode - if supplied, the session SQL mode will be changed to this 57 setting (MySQL-4.1 and newer). For more details and legal 58 values, see the MySQL documentation. 59 client_flag - integer, flags to use or 0 60 (see MySQL docs or constants/CLIENTS.py) 61 ssl - dictionary or mapping, contains SSL connection parameters; 62 see the MySQL documentation for more details 63 (mysql_ssl_set()). If this is set, and the client does not 64 support SSL, NotSupportedError will be raised. 65 local_infile - integer, non-zero enables LOAD LOCAL INFILE; zero disables 66 format - string format for log handler 67 filter - filter object from logger object 68 datefmt - data/time format 69 path - directory path to log file 70 filename - log filename, default log 71 filemode - mode to open log file, default='a' 72 level - set root logger level to specified level 73 logfilelevel- set level to log file 74 cache - create cache for query, default=True 75 scheme2file - Boolen - save database scheme to shelve file 76 log - log flag, boolean default=False 77 OUTPUT: 78 class object 79 """
80 - def __init__(self,host='localhost',db='Ligand',user=None,passwd=None,port=3306,log=False,**kwargs):
81 TaBuilder.__init__(self,host,db,user,passwd,port,log,**kwargs) 82 InserTables.__init__(self)
83
84 - def readb(self,logdebug=False,lowercasetablenames=True,**kwarg):
85 """ 86 create id:smile dictionary from ChemComp table 87 INPUT: 88 logdebug - log debug in log file 89 lowercasetablenames - boolean, lower case table names MySQL engine settings, default True 90 OUTPUT: 91 dictionary 92 """ 93 print 'Importing %s table description.' %self.kwargs['db'] 94 self.tabdesc=self.genTables() 95 print 'Database ChemComp table data importing, please wait ....' 96 self.log.info('Database ChemComp table importing') 97 ### get ChemComp dictionary {id:smicode} ### 98 ChemCompIDict=self.getChemCompIdIsoSmi(logdebug=logdebug,lowercasetablenames=lowercasetablenames,**kwarg) 99 self.closeDB()#close database connection) 100 return ChemCompIDict
101
102 - def isTabDesCorr(self,tablename,tabcol):
103 """ 104 check table description with table name 105 INPUT: 106 tablename- name of table 107 tabcol - list of column names 108 OUTPUT: 109 boolean 110 """ 111 try: 112 colname=self.tabdesc[tablename].getColName()#get list of columns 113 except KeyError,error: 114 print 'Error, %s: Missing description for %s!' %(tablename,error) 115 self.log.exception('%s: Missing description for %s',tablename,error) 116 return False 117 if len(colname)<len(tabcol):#check number of arguments 118 print 'Error, %s: Too many arguments in table column list!!'%tablename 119 self.log.error('%s: Too many arguments in table column list',tablename) 120 return False 121 else: 122 for name in tabcol: 123 if name in colname:#check if key in column list 124 continue 125 else: 126 print 'Error, %s: Incorrect column name for %s!'%(tablename,name) 127 self.log.error('%s: Incorrect column name for %s',tablename,name) 128 return False 129 return True
130
131 -class Smi2SmiDict(object):
132 """ 133 get id:smile dictionary from smi file 134 INPUT: 135 smifilepath - str, smile file path 136 OUTPUT: 137 class object 138 """
139 - def __init__(self,smifilepath='db2smi',**kwargs):
140 try: 141 self.logobj=MoSTBioDatLog(**kwargs)#create logging object 142 self.log=self.logobj.getLogHandler()#create logging handler 143 except IOError,e: 144 print 'Error: %s, %s' %(e[0],e[1]) 145 sys.exit(1) 146 self.kwargs={} 147 try: 148 self.smifilepath=str(smifilepath) 149 except ValueError,e: 150 print 'Error: %s' %e 151 sys.exit(1) 152 self.findfile()
153
154 - def findfile(self):
155 """ 156 find and open smi file 157 INPUT: 158 class object 159 OUTPUT: 160 file object 161 """ 162 fullfilename=os.path.abspath(self.smifilepath) 163 if (os.path.exists(fullfilename) and os.path.isfile(fullfilename)): 164 if os.access(fullfilename,os.R_OK): 165 try: 166 if fullfilename.endswith('.gz'): 167 file=gzip.open(fullfilename,'r') 168 else: 169 file=open(fullfilename,'r') 170 except IOError,error: 171 print 'Error: %s for %s' %(error,fullfilename) 172 else: 173 self.file=file 174 else: 175 print 'Permission denied: %s' %fullfilename 176 sys.exit(1) 177 else: 178 print 'Unable to find %s!' %fullfilename 179 sys.exit(1)
180
181 - def readfile(self,delimiter=' '):
182 """ 183 create id:smile dictionary from smi file 184 INPUT: 185 class object 186 OUTPUT: 187 dictionary 188 """ 189 nmol=0#molecule counter 190 nmolins=0#molecule inserted into dictionary 191 notmolins=0#not inserted into dictionary 192 sw=OEStopwatch()#time counter 193 sw.Start() 194 dots=OEDots(10000,500,'>> SMILES')#dots progress indicator 195 print 'Parsing SMI file, please wait ...' 196 self.log.info('Parsing SMI file: %s',self.smifilepath) 197 smidict={} 198 while True: 199 line=self.file.readline()#readline 200 if not line: break 201 dots.Update() 202 nmol+=1 203 line=line.split(delimiter) 204 isosmi=line[0]#isomeric smile 205 id=line[1]#get id 206 id=id.rstrip('\n') 207 if smidict.has_key(id): 208 print 'Waring %s: %s already in dictionary!' %(isosmi,id) 209 self.log.warning('%s: %s already in dictionary',isosmi,id) 210 notmolins+=1 211 continue 212 mol=OEGraphMol() 213 if not (OEParseSmiles(mol,isosmi)==1): 214 print 'Warning %s: %s is invalid!'%(isosmi,id) 215 self.log.warning('%s: %s is invalid',isosmi,id) 216 notmolins+=1 217 continue 218 nmolins+=1 219 smidict[id]=isosmi 220 221 self.file.close() 222 dots.Total() 223 print "SMILE time writing: %.2f s" %(sw.Elapsed()) 224 self.log.info('Read %s SMILES in %.2f s, inserted %s, refused %s',nmol,sw.Elapsed(),nmolins,notmolins) 225 print "%s molecules inserted, %s refused!" %(nmolins,notmolins) 226 return smidict
227 228 ################## End of class ######################################################## 229 ############## MAIN ################################################################ 230 ############ Example of usage ########################################################## 231 if __name__=='__main__': 232 pass 233 # print '*** Get Id:Smile data dictionary from database or smi file dictionary *** ' 234 # A=DB2SmiDict(host='localhost',db='',user='',passwd='',path='/tmp/Log',filename='db2smi') 235 # db2smidict=A.readb(logdebug=False,lowercasetablenames=True) 236 # A=Smi2SmiDict(smifilepath='/tmp/DB2Smi/db2smi.smi',path='/tmp/Log',filename='smi2smi') 237 # smi2smidict=A.readfile() 238