Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package Data2DB :: Module PropZINC
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.Data2DB.PropZINC

  1  #!/usr/bin/env python 
  2  ###################################### 
  3  # PropZINC.py                        # 
  4  # Parse Property ZINC database file  # 
  5  ###################################### 
  6   
  7  ###################################################### 
  8  # Copyright (c) 2007-2008 Andrzej Bak                # 
  9  # ARC Seibersdorf & University of Silesia            # 
 10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
 11  # License: GNU General Public License, version: 3    # 
 12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
 13  # Version: 1, 06.01.2010                             # 
 14  ###################################################### 
 15   
 16  try: 
 17      import sys 
 18      import os 
 19      import anydbm 
 20      from openeye.oechem import * 
 21      from MoSTBioDat.DataBase.ImportData.Data2DB.PropFile import PropFile 
 22      from MoSTBioDat.DataBase.ImportData.Data2DB.Smile import Smile 
 23  except ImportError,e: 
 24      print 'Error: %s' %e 
 25      sys.exit(1) 
 26  ########## Property ZINC file class ################### 
27 -class PropZINC(PropFile):
28 """ 29 Property ZINC File parser 30 INPUT: 31 propfile - property file path, str, default=None 32 dictpath - str - path to dictionary 33 dictfilename - str - dictionary filename 34 dbfile - str - database file path 35 format - str - logging file format, default "%(levelname)s %(asctime)s %(ip)s %(username)s %(filename)s %(module)s %(funcName)s %(lineno)d >> %(message)s " 36 filter - specified record to be logged 37 datefmt - str - date format 38 path - str - path to log files 39 filename - str - log filename, default 'log' 40 filemode - str - log file mode, default 'wa' 41 level - str - logging level, default logging.DEBUG 42 logfilelevel - log file level 43 temporary - str - temporary directory 44 OUTPUT: 45 dictionary object 46 """
47 - def __init__(self,propfile=None,**kwargs):
48 PropFile.__init__(self,propfile=propfile,**kwargs)
49
50 - def proParse(self,spacedelim='\t'):
51 """ 52 Property text file parser, first line - property definition 53 INPUT: 54 class object 55 spacedelim - space delimiter specificator, default = '\t' 56 OUTPUT: 57 """ 58 nmol=0#molecule counter 59 nmolins=0#molecule inserted into dictionary 60 notmolins=0#not inserted into dictionary 61 sw=OEStopwatch()#time counter 62 sw.Start() 63 dots=OEDots(30000,1000,'>> Compounds')#dots progress indicator 64 self.openDict() 65 if not self.openDict():#can not open shelve dictionary 66 sys.exit(1) 67 self.log.info('Parsing Property text file: %s',self.kwargs['dbfile']) 68 print 'Parsing %s, please wait ...' %self.kwargs['dbfile'] 69 countline=0 70 while True: 71 line=self.file.readline() 72 if not line: break 73 countline+=1 74 if self.isfirstline(countline): 75 line=line.rstrip('\n')#delete '\n' 76 attriblist=line.split(spacedelim)#attribute list from first line 77 else: 78 nmol+=1 79 dots.Update()#dots progress indicator update 80 line=line.rstrip('\n')#delete '\n' 81 attrvalist=line.split(spacedelim)#attribute value list 82 if len(attriblist)==len(attrvalist):# list length must agree 83 propdict={} 84 propdict=dict((i,j) for i,j in zip(attriblist,attrvalist))#create dictionary with attrib:attrval from lists 85 isosmi=propdict.get('SMILES',None)#get isomeric SMILE code 86 if isosmi:#if SMILE attribute exists 87 dictobj=self.getDict(isosmi)#get dictionary object for given isomeric smile 88 if dictobj:#if object in dictionary 89 if dictobj.kwargs.has_key('property'):#if has key 90 dictobj.kwargs['property'].update(propdict) 91 else: 92 dictobj.kwargs['property']=propdict 93 try: 94 self.insertDict(isosmi,dictobj)#insert updated object into dictionary 95 nmolins+=1 96 except anydbm.error, error: 97 print 'Error: %s' %error 98 self.log.exception('Error: %s',error) 99 self.closeDict() 100 sys.exit(1) 101 else: 102 notmolins+=1 103 continue#no object in dictionary 104 else: 105 print 'No SMILE attribute!' 106 notmolins+=1 107 continue# if not SMILE attribute 108 else: 109 notmolins+=1 110 continue#if length not agreed 111 self.closeDict() 112 dots.Total() 113 print "SDF time reading: %.2f s" %(sw.Elapsed()) 114 self.log.info('Read %s SMILES in %.2f s, %s: inserted %s, refused %s',nmol,sw.Elapsed(),self.kwargs['dictfilename'],nmolins,notmolins) 115 print "%s: %s molecules inserted, %s refused!" %(self.kwargs['dictfilename'],nmolins,notmolins) 116 self.logobj.rmLogHandler()
117
118 - def isfirstline(self,counter):
119 """ 120 Check if line is first 121 INPUT: 122 counter - int 123 OUTPUT: 124 boolean 125 """ 126 return counter==1;
127 ############### End of class ########################################## 128 ############ MAIN ##################################################### 129 ############ example of usage ######################################### 130 if __name__=='__main__': 131 pass 132 # A=PropZINC('/tmp/testFile/prop.txt',dictpath='/home/baka/SDict',dictfilename='SDict2DBStdInp0',path='/tmp/Log',filename='propZINC') 133 # A.proParse() 134