Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package DB2Data :: Module DB2SDF
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.DB2Data.DB2SDF

  1  #!/usr/bin/env python 
  2  ############################## 
  3  # DB2SDF.py                  # 
  4  # Import data from database  # 
  5  ############################## 
  6   
  7  ###################################################### 
  8  # Copyright (c) 2007-2008 Andrzej Bak                # 
  9  # ARC Seibersdorf & University of Silesia            # 
 10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
 11  # License: GNU General Public License, version: 3    # 
 12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
 13  # Version: 1, 05.05.2010                             # 
 14  ###################################################### 
 15   
 16  try: 
 17      import sys 
 18      import os 
 19      from openeye.oechem import * 
 20      from MoSTBioDat.DataBase.ImportData.Data2DB.TaBuilder import TaBuilder 
 21      from MoSTBioDat.DataBase.ImportData.Data2DB.InserTables import InserTables, all 
 22       
 23      from MoSTBioDat.DataBase.ImportData.Data2DB.Smile import Smile 
 24  except ImportError,e: 
 25      print 'Error: %s' %e 
 26      sys.exit(1) 
 27  ########## DB2SDF class ######################### 
28 -class DB2SDF(TaBuilder,InserTables):
29 """ 30 Import data from Ligand database to SDF file 31 INPUT: 32 id - int, compound Id in ChemComp table, default False 33 isosmi - str, isomeric SMILE code, default False 34 zincode - str, compound ZINC code, default False 35 dirpath - str, directory path 36 sdfile - str, sdf file path 37 confid - int, conformer table id 38 atcharge - str, atom charge property name, default 'Charge' 39 lowercasetablenames, boolean, MySQL lower case table setting, default True 40 host - string, host to connect 41 user - string, user to connect as 42 passwd - string, password to use 43 db - string, database to use 44 port - integer, TCP/IP port to connect 45 logdebug - boolean, debugging flag, default False 46 fileformat - openeye output file format 47 tag - boolean, str or list of tag names 48 log - boolean, logging flag 49 unix_socket - string, location of unix_socket to use 50 conv - conversion dictionary, see MySQLdb.converters 51 connect_timeout - number of seconds to wait before the connection attempt fails. 52 compress - if set, compression is enabled 53 named_pipe - if set, a named pipe is used to connect (Windows only) 54 init_command - command which is run once the connection is created 55 read_default_file - file from which default client values are read 56 read_default_group - configuration group to use from the default file 57 cursorclass - class object, used to create cursors (keyword only) 58 use_unicode - if True, text-like columns are returned as unicode objects 59 using the connection's character set. Otherwise, text-like 60 columns are returned as strings. columns are returned as 61 normal strings. Unicode objects will always be encoded to 62 the connection's character set regardless of this setting. 63 charset - if supplied, the connection character set will be changed 64 to this character set (MySQL-4.1 and newer). This implies 65 use_unicode=True. 66 sql_mode - if supplied, the session SQL mode will be changed to this 67 setting (MySQL-4.1 and newer). For more details and legal 68 values, see the MySQL documentation. 69 client_flag - integer, flags to use or 0 70 (see MySQL docs or constants/CLIENTS.py) 71 ssl - dictionary or mapping, contains SSL connection parameters; 72 see the MySQL documentation for more details 73 (mysql_ssl_set()). If this is set, and the client does not 74 support SSL, NotSupportedError will be raised. 75 local_infile - integer, non-zero enables LOAD LOCAL INFILE; zero disables 76 format - string format for log handler 77 filter - filter object from logger object 78 datefmt - data/time format 79 path - directory path to log file 80 filename - log filename, default log 81 filemode - mode to open log file, default='a' 82 level - set root logger level to specified level 83 logfilelevel- set level to log file 84 cache - create cache for query, default=True 85 scheme2file - boolean - save database scheme to shelve file 86 addH - boolean - add hydrogens 87 protstat - boolean, protonation state to physiological condition, default False 88 OUTPUT: 89 sdf file 90 """ 91
92 - def __init__(self,dirpath='',sdfile='',host='localhost',db='Ligand',user=None,passwd=None,port=3306,log=False,**kwargs):
93 TaBuilder.__init__(self,host,db,user,passwd,port,log,**kwargs) 94 self.tabdesc=self.genTables() 95 InserTables.__init__(self) 96 try: 97 self.kwargs['sdfile']=str(sdfile) 98 except ValueError,e: 99 print 'Error: %s' %e 100 sys.exit(1) 101 self.kwargs['dirpath']=dirpath 102 if not self.kwargs.has_key('dirpath'): 103 self.kwargs.setdefault('dirpath','/tmp') 104 if not self.kwargs.has_key('temporary'): 105 self.kwargs.setdefault('temporary','/tmp/SDFile') 106 self.createsdfilepath() 107 108 self.kwargs.update(kwargs) 109 if not self.kwargs.has_key('isosmi'): 110 self.kwargs.setdefault('isosmi',False) 111 if not self.kwargs.has_key('zincode'): 112 self.kwargs.setdefault('zincode',False) 113 if not self.kwargs.has_key('id'): 114 self.kwargs.setdefault('id',False) 115 if not self.kwargs.has_key('iso'): 116 self.kwargs.setdefault('iso',True) 117 if not self.kwargs.has_key('confid'): 118 self.kwargs.setdefault('confid',[]) 119 if not self.kwargs.has_key('logdebug'): 120 self.kwargs.setdefault('logdebug',False) 121 if not self.kwargs.has_key('fileformat'): 122 self.kwargs.setdefault('fileforamt','OEFormat_SDF') 123 if not self.kwargs.has_key('tag'): 124 self.kwargs.setdefault('tag',False) 125 if not self.kwargs.has_key('atcharge'): 126 self.kwargs.setdefault('atcharge','Charge') 127 if not self.kwargs.has_key('lowercasetablenames'): 128 self.kwargs.setdefault('lowercasetablenames',True) 129 if not self.kwargs.has_key('addH'): 130 self.kwargs.setdefault('add',False) 131 if not self.kwargs.has_key('protstat'): 132 self.kwargs.setdefault('protstat',False) 133 134 moldict=self.db2mol(tabcolvaldict={'Id':self.kwargs['id'],'IsoSmi':self.kwargs['isosmi'],'ZINCode':self.kwargs['zincode']} 135 ,iso=self.kwargs['iso'],confid=self.kwargs['confid'],logdebug=self.kwargs['logdebug'],lowercasetablenames=self.kwargs['lowercasetablenames']) 136 if moldict: 137 self.createsdfile(moldict=moldict,format=self.kwargs['fileformat']) 138 self.closeDB()
139
140 - def createsdfilepath(self):
141 """ 142 create SDF file 143 INPUT: 144 class object 145 OUTPUT: 146 sdf filename absolute path 147 """ 148 if self.kwargs['dirpath']=='': 149 self.kwargs['dirpath']=os.path.abspath(os.curdir) 150 if not os.path.isdir(self.kwargs['dirpath']): 151 try: 152 os.makedirs(self.kwargs['dirpath']) 153 except Exception,error: 154 print 'Error: %s, %s for %s' %(error[0],error[1],self.kwargs['dirpath']) 155 print 'SDF files in %s' %self.kwargs['temporary']#create directory in temporary path 156 self.kwargs['dirpath']=self.kwargs['temporary'] 157 if not os.path.exists(self.kwargs['dirpath']): 158 os.makedirs(self.kwargs['dirpath']) 159 else: 160 if not os.access(self.kwargs['dirpath'],os.W_OK): 161 print 'Error: Permission denied! Unable to write in %s' %self.kwargs['dirpath'] 162 self.kwargs['dirpath']=self.kwargs['temporary'] 163 if not os.path.exists(self.kwargs['dirpath']): 164 os.makedirs(self.kwargs['dirpath']) 165 elif not os.access(self.kwargs['dirpath'],os.W_OK): 166 print 'Error: Permission denied! Unable to write in %s' %self.kwargs['dirpath'] 167 sys.exit(1) 168 self.sdfullfilename=os.path.join(self.kwargs['dirpath'],self.kwargs['sdfile']) 169 if os.path.isfile(self.sdfullfilename): 170 print 'Error: Specified file: %s exists in %s' %(self.kwargs['sdfile'],self.kwargs['dirpath']) 171 sys.exit(1) 172 print 'SDF file path: %s' %self.sdfullfilename
173
174 - def db2mol(self,logdebug=False,tabcolvaldict={},lowercasetablenames=True,**kwarg):
175 """ 176 get molecule dictionary from database 177 INPUT: 178 logdebug - boolean, debug logging 179 tabcolvalidict - dict, table column/value dictionary 180 lowercasetablename - boolean, MySQL lower case table names setting 181 OUTPUT: 182 moldict - dict, molecule dictionary 183 """ 184 if not isinstance(self.kwargs['atcharge'],str): 185 print 'Error: Incorrect atom charge definition! Must be string. Please correct it ...' 186 self.log.error('Incorrect atom charge definition. Must be string') 187 return 188 ### get PropDef table ### 189 propdefdict=self.getPropDef(lowercasetablenames=lowercasetablenames) 190 tablename='PropDef' 191 if not propdefdict: 192 print 'Data in %s not available!'%tablename 193 self.log.error('Data in %s not available',tablename) 194 return 195 atcharge,refatcharge=self.checkPropId([self.kwargs['atcharge']], propdefdict) 196 if refatcharge: 197 print 'Warnings: Specified tag: %s not available in database!' %refatcharge 198 self.log.info('Specified tag: %s not available in database',refatcharge) 199 200 if self.kwargs['tag']: 201 property={} 202 if isinstance(self.kwargs['tag'],bool):# if tag is boolean 203 tags=propdefdict.values() 204 elif isinstance(self.kwargs['tag'],str):#if tag is string 205 tags=[self.kwargs['tag']] 206 elif isinstance(self.kwargs['tag'],list):#if tag is not list 207 tags=self.kwargs['tag'] 208 else: 209 print 'Error: Incorrect tag specification! Must be string, boolean or list! Please correct it ...' 210 self.log.error('Incorrect tag specification. Must be string, boolean or list') 211 return 212 tags=filter(lambda item: isinstance(item,str),tags)#keep only string instances 213 ### check if specified tags exist in database ### 214 tags,reftags=self.checkPropId(tags, propdefdict) 215 if reftags: 216 print 'Warnings: Specified tag: %s not available in database!' %reftags 217 self.log.info('Specified tag: %s not available in database',reftags) 218 219 ### search ChemComp table ### 220 tablename='ChemComp' 221 if lowercasetablenames: 222 tablename=tablename.lower() 223 ChemCompIDict=self.getChemComponIdIsoSmiZINCode(logdebug=logdebug,tabcolvaldict=tabcolvaldict,lowercasetablenames=lowercasetablenames,**kwarg) 224 if not ChemCompIDict: 225 print 'There is no specified input in %s table!'%tablename 226 self.log.info('There is no specified input in %s',tablename) 227 return 228 chemcompidfk=ChemCompIDict.keys() 229 chemcompisosmi=ChemCompIDict.values()[0]['IsoSmi']#get ChemComp.IsoSmi 230 ### check id key ### 231 if not isinstance(chemcompidfk,list) or len(chemcompidfk)!=1: 232 print 'Error, %s: Incorrect ID specification!' %tablename 233 self.log.error('%s: Incorrect ID specification',tablename) 234 return 235 if not isinstance(chemcompidfk[0],(int,long)):#id input ID is integer get ID, if long insert record 236 print 'Error, %s: Incorrect ID specification!' %tablename 237 self.log.error('%s: Incorrect ID specification',tablename) 238 return 239 if self.kwargs['tag']: 240 ### search PropChemCompInt table ### 241 tablename='PropChemCompInt'#tablename 242 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ChemCompIdFk']#table columname list 243 propchemcompint=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=chemcompidfk) 244 ## search PropChemCompFloat table ### 245 tablename='PropChemCompFloat'#tablename 246 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ChemCompIdFk']#table columname list 247 propchemcompfloat=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=chemcompidfk) 248 ## search PropChemCompText table ### 249 tablename='PropChemCompText'#tablename 250 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ChemCompIdFk']#table columname list 251 propchemcomptext=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=chemcompidfk) 252 ### update property dictionary ### 253 property.update(propchemcompint) 254 property.update(propchemcompfloat) 255 property.update(propchemcomptext) 256 257 ### search ProtStat table ### 258 tablename='ProtStat' 259 if lowercasetablenames: 260 tablename=tablename.lower() 261 protstatid=self.getProtStatonIdFk(logdebug=logdebug,tabcolvaldict={'ChemCompIdFk':chemcompidfk},lowercasetablenames=lowercasetablenames) 262 protstatid=protstatid.get(chemcompisosmi,None) 263 if not protstatid: 264 print 'There is no specified input in %s table!'%tablename 265 self.log.info('There is no specified input in %s',tablename) 266 return 267 ### check id key ### 268 if not isinstance(protstatid,list) or len(protstatid)!=1: 269 print 'Error, %s: Incorrect ID specification!' %tablename 270 self.log.error('%s: Incorrect ID specification',tablename) 271 return 272 if not isinstance(protstatid[0],(int,long)):#id input ID is integer get ID, if long insert record 273 print 'Error, %s: Incorrect ID specification!' %tablename 274 self.log.error('%s: Incorrect ID specification',tablename) 275 return 276 protstatid=[long(protstatid[0])] 277 278 if self.kwargs['tag']: 279 ### search PropProtStatInt table ### 280 tablename='PropProtStatInt'#tablename 281 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ProtStatIdFk']#table columname list 282 proppropstatint=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=protstatid) 283 ## search PropProtStatFloat table ### 284 tablename='PropProtStatFloat'#tablename 285 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ProtStatIdFk']#table columname list 286 propprotstatfloat=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=protstatid) 287 ## search PropProtStatText table ### 288 tablename='PropProtStatText'#tablename 289 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ProtStatIdFk']#table columname list 290 propprotstattext=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=protstatid) 291 ### update property dictionary ### 292 property.update(proppropstatint) 293 property.update(propprotstatfloat) 294 property.update(propprotstattext) 295 296 ### search Atoms table ### 297 tablename='Atoms' 298 if lowercasetablenames: 299 tablename=tablename.lower() 300 atomsid=self.getAtomsonIdFk(logdebug=logdebug,tabcolvaldict={'ProtStatIdFk':protstatid},lowercasetablenames=lowercasetablenames) 301 if not atomsid: 302 print 'There is no specified input in %s table!'%tablename 303 self.log.info('There is no specified input in %s',tablename) 304 return 305 ### search ConnTab table ### 306 tablename='ConnTab' 307 if lowercasetablenames: 308 tablename=tablename.lower() 309 conntabid=self.getConnTabonIdFk(logdebug=logdebug,tabcolvaldict={'ProtStatIdFk':protstatid},lowercasetablenames=lowercasetablenames) 310 311 if not conntabid: 312 print 'Warning: There is no specified input in %s table!'%tablename 313 self.log.info('There is no specified input in %s',tablename) 314 conntabid={} 315 ### search ConfStat table ### 316 tablename='ConfStat' 317 if lowercasetablenames: 318 tablename=tablename.lower() 319 confstatid=self.getConfStatonIdFk(logdebug=logdebug,tabcolvaldict={'ProtStatIdFk':protstatid},lowercasetablenames=lowercasetablenames) 320 if not confstatid: 321 print 'There is no specified input in %s table!'%tablename 322 self.log.info('There is no specified input in %s',tablename) 323 return 324 325 getconfid=kwarg.get('confid',None) 326 if getconfid:#if confid is specified 327 if isinstance(getconfid,(int,long)):#if confid is integer or long 328 getconfid=[getconfid] 329 if not isinstance(getconfid,list): 330 print 'Error: Incorrect conformer Id! Must be integer, long or list! Please correct it ...' 331 self.log.error('Incorrect conformer id. Must be integer, long or list') 332 return 333 ### get only integer or long values ### 334 getconfid=filter(lambda item: isinstance(item,(int,long)),getconfid) 335 ### check if getconfid exist in database ### 336 confstatid,refuseid=self.checkConfId(getconfid,confstatid) 337 if refuseid: 338 print 'Warnings: Specified Id: %s not available in database!' %refuseid 339 self.log.info('Specified Id: %s not available in database',refuseid) 340 341 allconfproperty={}#all conformer property dictionary 342 for confid in confstatid: 343 if self.kwargs['tag']: 344 confpropdict={}#conformer property dictionary 345 ### search PropConfStatInt table ### 346 tablename='PropConfStatInt'#tablename 347 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ConfStatIdFk']#table columname list 348 proppconfstatint=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=[confid]) 349 ## search PropConfStatFloat table ### 350 tablename='PropConfStatFloat'#tablename 351 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ConfStatIdFk']#table columname list 352 propconfstatfloat=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=[confid]) 353 ## search PropConfStatText table ### 354 tablename='PropConfStatText'#tablename 355 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'ConfStatIdFk']#table columname list 356 propconfstatttext=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=[confid]) 357 ### update property dictionary ### 358 confpropdict.update(proppconfstatint) 359 confpropdict.update(propconfstatfloat) 360 confpropdict.update(propconfstatttext) 361 allconfproperty[confid]=confpropdict 362 ### search AtomStat table ### 363 tablename='AtomStat' 364 if lowercasetablenames: 365 tablename=tablename.lower() 366 atomstatid=self.getAtomStatonIdFk(logdebug=logdebug,tabcolvaldict={'ConfStatIdFk':confstatid},lowercasetablenames=lowercasetablenames) 367 if not atomstatid: 368 print 'There is no specified input in %s table!'%tablename 369 self.log.error('There is no specified input in %s',tablename) 370 return 371 atomsidict=dict([(item[0][0],[item[1]+1,item[0][1]]) for item in zip(atomsid,xrange(0,len(atomsid)))]) 372 ### get ElemTab table ### 373 elemtabdict=self.getElemTab(lowercasetablenames=lowercasetablenames) 374 tablename='ElemTab' 375 if not elemtabdict: 376 print 'Data in %s not available!'%tablename 377 self.log.error('Data in %s not available',tablename) 378 return 379 380 confmolcharge={}#all conformer molecule charge flag 381 for confstatid, coords in atomstatid.iteritems(): 382 if len(coords) !=len(atomsidict): 383 print 'Error: Different length for coordinate lists!' 384 self.log.error('Different length for coordinate list') 385 return 386 corrlist=[]#correction atom list 387 confpropdict={}#conformer property dictionary 388 for coord in coords: 389 ### join AtomStat with Atoms table ### 390 if atomsidict.has_key(coord[0]): 391 tempval=atomsidict[coord[0]] 392 coord[0]=tempval[0] 393 coord.insert(4,(tempval[1])) 394 else: 395 print 'Error: Specified Atoms foreign key not available!' 396 self.log.error('Specified Atoms foreign key not available') 397 return 398 ### join AtomStat with ElemTab table ### 399 if elemtabdict.has_key(coord[4]): 400 coord[4]=elemtabdict[coord[4]] 401 else: 402 print 'Error: Atom name not available in ElemTab table!' 403 self.log.error('Atom name not available in ElemTab table') 404 return 405 if self.kwargs['tag']: 406 ### search PropAtomStatInt table ### 407 tablename='PropAtomStatInt'#tablename 408 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'AtomStatIdFk']#table columname list 409 propatomstatint=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=[coord[5]]) 410 ### search PropAtomStatFloat table ### 411 tablename='PropAtomStatFloat'#tablename 412 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'AtomStatIdFk']#table columname list 413 propatomstatfloat=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=[coord[5]]) 414 ## search PropAtomStatText table ### 415 tablename='PropAtomStatText'#tablename 416 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'AtomStatIdFk']#table columname list 417 propatomstatttext=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=tags,logdebug=logdebug,lowercasetablenames=lowercasetablenames,ID=[coord[5]]) 418 ### update conformer property dictionary ### 419 confpropdict.update(propatomstatint) 420 confpropdict.update(propatomstatfloat) 421 confpropdict.update(propatomstatttext) 422 423 if atcharge: 424 ### search atom charges in PropAtomStatInt ### 425 tablename='PropAtomStatInt'#tablename 426 tabcol= ['Id', 'PropVal', 'PropDefIdFk', 'AtomStatIdFk']#table columname list 427 atchargeint=self.getProponIdFk(tablename=tablename,tabcol=tabcol,tabcolvaldict=atcharge,logdebug=logdebug,lowercasetablenames=lowercasetablenames,idflag=True,ID=[coord[5]]) 428 if atchargeint and len(atchargeint)==1: 429 coord[5]=atchargeint.values()[0][0] 430 corrlist.append(True) 431 else:#no or more than one charge value for specified atom 432 corrlist.append(False) 433 if self.kwargs['tag']: 434 allconfproperty[confstatid].update(confpropdict) 435 if all(corrlist):#check if setting molecule charges by OE is needed 436 setmolcharge=False 437 else: 438 setmolcharge=True 439 confmolcharge[confstatid]=setmolcharge 440 441 # ### sorted connection table from db ### 442 # smidbkeys=conntabid.keys() 443 # smidbkeys.sort() 444 # dbbonds= map(conntabid.get,smidbkeys) 445 moldict={} 446 moldict['filename']='%s'%chemcompidfk[0] 447 moldict['isosmi']=chemcompisosmi 448 moldict['nbonds']=len(conntabid) 449 moldict['natoms']=len(atomsid) 450 moldict['conntab']=conntabid 451 moldict['coords']=atomstatid 452 if self.kwargs['tag']: 453 moldict['property']=property 454 moldict['confproperty']=allconfproperty 455 moldict['setmolcharge']=confmolcharge 456 457 return moldict
458
459 - def convertDict(self,propdict):
460 """ 461 convert property dictionary 462 INPUT: 463 propdict - dict, property dictionary 464 OUTPUT: 465 convdict - dict, converted property dictionary {propName:[propValue]} 466 """ 467 convdict={} 468 for propval in propdict.values(): 469 if not convdict.has_key(propval[1]): 470 convdict[propval[1]]=[propval[0]] 471 else: 472 convdict[propval[1]].append(propval[0]) 473 return convdict
474
475 - def checkPropId(self,propname,propdefdict):
476 """ 477 check whether specified value exists in database 478 INPUT: 479 propname - list, property name list 480 propdefdict - dict, property definiction dictionary 481 OUTPUT: 482 tags - dict, property id: property name dictionary 483 reftags - list, refused tag name list 484 """ 485 reftags=[] 486 tags={} 487 ### invert dictionary ### 488 revpropdefdict=dict([(propval,propkey) for propkey,propval in propdefdict.iteritems()]) 489 for propitem in propname: 490 if revpropdefdict.has_key(propitem): 491 tags[revpropdefdict[propitem]]=propitem 492 else: 493 reftags.append(propitem) 494 return tags, reftags
495
496 - def checkConfId(self,getconfid,confstatid):
497 """ 498 check whether specified value exists in database 499 INPUT: 500 getconfid - list, get conformation Id 501 confstatid - list, conformation Id taken from database 502 OUTPUT: 503 getconf - list, list of correct id 504 refuseid - list, list of refused id 505 """ 506 getconf=[] 507 refuseid=[] 508 for confid in getconfid: 509 if confid in confstatid: 510 getconf.append(confid) 511 else: 512 refuseid.append(confid) 513 return getconf, refuseid
514
515 - def createsdfile(self,moldict=None,format='OEFormat_SDF'):
516 """ 517 create SDF file 518 INPUT: 519 moldict - dict, molecule dictionary 520 format - str, oe file format 521 OUTPUT: 522 SDF file format 523 """ 524 conflist=self.dict2OEmol(moldict)#get molecule conformer list 525 if conflist: 526 ofs=oemolostream() 527 528 if self.sdfullfilename and not ofs.open(self.sdfullfilename):# confidlist=9 529 OEThrow.Fatal('Cannot open: %s' %self.sdfullfilename) 530 ofs.SetFormat(eval(format)) 531 532 print 'Number of conformers: %s' %len(conflist) 533 count=0 534 for conf in conflist: 535 count+=1 536 print 'Writing conformer number: %s, id: %s' %(count,conf.GetIntData('ConfId')) 537 OEWriteMolecule(ofs,conf) 538 ofs.close()#close file
539
540 - def dict2OEmol(self,moldict):
541 """ 542 create oe molecule 543 INPUT: 544 moldict - dict, molecule dictionary 545 OUTPUT: 546 openeye molecule object list 547 """ 548 if not moldict: 549 print 'Error: Molecule dictionary not available!' 550 self.log.error('Molecule dictionary not available') 551 return 552 553 coords=moldict.get('coords',None)#get conformers coordinate dictionary 554 if not coords: 555 print 'Error: Atom coordinate not available!' 556 self.log.error('Atom coordinate not available') 557 return 558 conntab=moldict.get('conntab',None) 559 if not conntab: 560 print 'Warning: Bonds not available!' 561 self.log.warning('Bonds not available') 562 563 if self.kwargs['tag']: 564 property=moldict['property'] 565 confproperty=moldict['confproperty'] 566 567 # #### connection table generated from SMILES code ### 568 # smiobj=Smile(smile=moldict['isosmi']) 569 # #smiobj.addH(mol=smiobj.getMol()) 570 # mol1=smiobj.CanMol(mol=smiobj.getMol(),kek=False,aromodel=OEAroModelOpenEye,verbose=0) 571 # for bond in mol1.GetBonds(): 572 # bond.SetIntType(bond.GetOrder()) 573 # bonds=smiobj.getBonds(mol1)#add bonds dictionary 574 # del mol1 575 # smibonds=bonds['bonds'] 576 # conntab=smibonds 577 578 conntab=conntab.values() 579 580 setmolcharge=moldict['setmolcharge'] 581 molist=[]#molecule list 582 for confid,molcoords in coords.iteritems():#iterate conformer 583 mol=OEMol()#create OE molecule object 584 atlist=[] 585 molcharge=setmolcharge[confid]#get molecule charge flag 586 ### set coordinates to conformation molecule ### 587 for coord in molcoords: 588 atsymbol=coord[4] 589 atype='OEElemNo_%s'%atsymbol#atom symbol 590 newat=mol.NewAtom(eval(atype))#create new oe atom 591 if not molcharge: 592 newat.SetFormalCharge(coord[5]) 593 atlist.append(newat) 594 atcoords=coord[1:4]#atom coordinates x,y,z 595 mol.SetCoords(newat,atcoords)# set coordinates to oe atom 596 597 ### set bonds to conformation molecule ### 598 for conntabitem in conntab: 599 begat=conntabitem[0]-1 600 endat=conntabitem[1]-1 601 bondtype=conntabitem[2] 602 bond=mol.NewBond(atlist[begat],atlist[endat],bondtype) 603 bond.SetIntType(bondtype) 604 605 mol.SetIntData('ConfId',confid)#set conformer id to conformer molecule 606 mol.SetTitle('%s_%s' %(moldict['filename'],confid)) 607 OESetDimensionFromCoords(mol) 608 OEFindRingAtomsAndBonds(mol)#find ring atoms 609 610 if self.kwargs['tag']: 611 if property: 612 for tagname,tagvalue in property.iteritems(): 613 OESetSDData(mol,tagname,str(tagvalue)) 614 if confproperty.has_key(confid): 615 for tagname, tagvalue in confproperty[confid].iteritems(): 616 OESetSDData(mol,tagname,str(tagvalue)) 617 molist.append(mol) 618 619 ### Assign aromatic flags ### 620 OEClearAromaticFlags(mol) 621 OEKekulize(mol) 622 OEAssignAromaticFlags(mol) 623 ### add hydrogens ### 624 if self.kwargs['addH']: 625 if OEHasExplicitHydrogens(mol): break 626 if not OEHasImplicitHydrogens(mol): 627 if self.kwargs['protstat']: 628 OEAssignMDLHydrogens(mol) 629 else: 630 OEAssignImplicitHydrogens(mol)#add hydrogens in physiological conditions 631 OEAssignFormalCharges(mol)#add charges 632 OEAddExplicitHydrogens(mol)#change implicit H into explicit 633 OESet3DHydrogenGeom(mol)#add 3D coordinates 634 if molcharge: 635 OEAssignFormalCharges(mol) 636 637 del mol 638 return molist
639
640 - def isTabDesCorr(self,tablename,tabcol):
641 """ 642 check table description with table name 643 INPUT: 644 tablename - str, name of table 645 tabcol - list, list of column names 646 OUTPUT: 647 boolean 648 """ 649 try: 650 colname=self.tabdesc[tablename].getColName()#get list of columns 651 except KeyError,error: 652 print 'Error: Missing description for %s!' %error 653 self.log.exception('%s: Missing description for %s',tablename,error) 654 return False 655 if len(colname)<len(tabcol):#check number of arguments 656 print 'Error: Too many arguments in table column list!!' 657 self.log.error('%s: Too many arguments in table column list',tablename) 658 return False 659 else: 660 for name in tabcol: 661 if name in colname:#check if key in column list 662 continue 663 else: 664 print 'Error: Incorrect column name for %s!'%name 665 self.log.error('%s: Incorrect column name for %s',tablename,name) 666 return False 667 return True
668 ################## End of class ######################################################## 669 ############## MAIN ################################################################ 670 ############ Example of usage ########################################################## 671 if __name__=='__main__': 672 pass 673 # ### search on basis of Id ### 674 # isosmi='C1=CC(C(C(=C1)C(=O)O)O)O' 675 # conflist=[1,9] 676 # isosmi='CC[C@@]12CCC[N@H+]3[C@@H]1c4c(c5ccccc5n4C(=C2)C(=O)OCC)CC3' 677 # conflist=[3,5] 678 # A=DB2SDF(isosmi=isosmi,id='3',zincode='ZINC03630778',iso=True,confid=conflist, 679 # dirpath='/tmp/SDFtest',sdfile='mol1.sdf', 680 # host='',db='',user='',passwd='',path='/tmp/Log',filename='db2mol', 681 # fileformat='OEFormat_SDF',atcharge='Charge',addH=False,protstat=False) 682 # ## search on basis of ZINCode ### 683 # conflist=[4] 684 # A=DB2SDF(isosmi='C1=CC(C(C(=C1)C(=O)O)O)O',zincode='ZINC03814434',iso=True,confid=conflist, 685 # dirpath='/tmp/SDFtest',sdfile='mol1.sdf', 686 # host='',db='',user='',passwd='',path='/tmp/Log',filename='db2mol', 687 # fileformat='OEFormat_SDF',atcharge='Charge') 688 # 689 # ## search on basis of IsoSmi and specified conformer Id ### 690 # confidlist=[1,9] 691 # isosmi='C1=CC(C(C(=C1)C(=O)O)O)O' 692 # # tag specified as boolean ### 693 # A=DB2SDF(isosmi=isosmi,iso=True, 694 # dirpath='/tmp/SDFtest',sdfile='mol1.sdf',confid=confidlist,tag=False, 695 # host='',db='',user='',passwd='',path='/tmp/Log',filename='db2mol', 696 # fileformat='OEFormat_SDF',atcharge='Charge') 697 # 698 # ### tag specified as string ### 699 # A=DB2SDF(isosmi=isosmi,iso=True, 700 # dirpath='/tmp/SDFtest',sdfile='mol1.sdf',confid=confidlist,tag='AtSeqNum', 701 # host='',db='',user='',passwd='',path='/tmp/Log',filename='db2mol', 702 # fileformat='OEFormat_SDF',atcharge='Charge') 703 # ### tag specified as list of strings ### 704 # A=DB2SDF(isosmi=isosmi,iso=True, 705 # dirpath='/tmp/SDFtest',sdfile='mol1.sdf',confid=confidlist,tag=['AtSeqNum', 'AtPDBSymbol'], 706 # host='',db='',user='',passwd='',path='/tmp/Log',filename='db2mol', 707 # fileformat='OEFormat_SDF',atcharge='Charge') 708