Package install :: Package MoSTBioDat :: Package DataBase :: Package ImportData :: Package Data2DB :: Module ZINCI2DB_version_8
[hide private]
[frames] | no frames]

Source Code for Module install.MoSTBioDat.DataBase.ImportData.Data2DB.ZINCI2DB_version_8

  1  #!/usr/bin/env python 
  2  #################################################################### 
  3  # ZINC2DB_version8.py                                              # 
  4  # SDF parser and ZINC Import module for ZINC version 8 (2008) # 
  5  #################################################################### 
  6   
  7  ###################################################### 
  8  # Copyright (c) 2007-2008 Andrzej Bak                # 
  9  # ARC Seibersdorf & University of Silesia            # 
 10  # Author: Andrzej Bak <Andrzej.Bak@us.edu.pl>        # 
 11  # License: GNU General Public License, version: 3    # 
 12  # URL: http://chemoinformatyka.us.edu.pl/mostbiodat/ # 
 13  # Version: 1, 06.01.2010                             # 
 14  ###################################################### 
 15   
 16  try: 
 17      import sys 
 18      from MoSTBioDat.DataBase.ImportData.Data2DB.SDFile import SDFile 
 19      from MoSTBioDat.DataBase.ImportData.Data2DB.PropZINC import PropZINC 
 20      from MoSTBioDat.DataBase.ImportData.Data2DB.SDict2DB import SDict2DB 
 21      from MoSTBioDat.DataBase.ImportData.Data2DB.PropDrugBank import PropDrugBank 
 22      import os 
 23  except ImportError,e: 
 24      print 'Error: %s' %e 
 25      sys.exit(1) 
 26   
27 -def checkacess(filename):
28 """ 29 check filename access 30 INPUT: 31 filename - filename, str 32 OUTPUT: 33 boolean 34 """ 35 if (os.path.exists(filename) and os.path.isfile(filename)): 36 if os.access(filename,os.R_OK): 37 return True 38 else: 39 print 'Error, Permission denied!' 40 return False 41 else: 42 print 'Error, Unable to find %s!' %filename 43 return False
44
45 -def genitem(item,basedir):
46 """ 47 generate item 48 INPUT: 49 item - list item 50 basedir - base directory, str 51 OUTPUT: 52 list 53 """ 54 stdfile=[] 55 extfile=[] 56 propfile=None 57 if item[1]: 58 absdfilelist1=[os.path.join(basedir,os.path.join(item[0],'%s_p0.%s.sdf.gz' %(item[0],num))) for num in item[1]] 59 stdfile=filter(lambda sdfile: checkacess(sdfile),absdfilelist1)#filter keys 60 if item[2]: 61 absdfilelist2=[os.path.join(basedir,os.path.join(item[0],'%s_p1.%s.sdf.gz' %(item[0],num))) for num in item[2]] 62 extfile=filter(lambda sdfile: checkacess(sdfile),absdfilelist2)#filter keys 63 if item[3]: 64 abspropfile=os.path.join(basedir,os.path.join(item[0],item[3])) 65 if checkacess(abspropfile): 66 propfile=abspropfile 67 return stdfile, extfile,propfile
68
69 -def genGO(filelist,propfilename):
70 """ 71 generate input shelve dictionary object 72 INPUT: 73 filelist - list of files, list 74 propfilename - property file name, str 75 OUTPUT: 76 filename - filename, str 77 indictfilename - input dictionary filename 78 """ 79 result=[] 80 for filename in filelist: 81 instdobjname=os.path.basename(filename) 82 if instdobjname.endswith('gz'): 83 instobjname=instdobjname[:-7] 84 elif instdobjname.endswith('sdf'): 85 instobjname=instdobjname[:-4] 86 indictfilename=os.path.join(dictionarypath,instobjname) 87 restuple=(filename,indictfilename,propfilename) 88 result.append(restuple) 89 return result
90 ######### MAIN ############################################################## 91 if __name__=='__main__': 92 print '*** Testing Input Object creation and DataBase import ***' 93 94 # ## GENERAL SETTINGS -### 95 # dictionarypath='/tmp/InputDB_v8_vendors' #path to shelve dictionary object 96 # ## ZINC settings for particular directories 97 # ## zinc setup item = [dirname= ' ',stdnum= [ ], extnum = [ ], propfilename= ' '] 98 # ## dirname - directory name, str 99 # ## stdnum - standard filenames numbers, list 100 # ## extnum - extended filename numbers, list 101 # ## propfilename - property filename 102 # ## ZINC subsets by VENDORS 103 # zincsetuplist=[ 104 # ['acb',[0],[0],'acb_prop.xls'], 105 # ['acdisc',[0],[0],'acdisc_prop.xls'], 106 # ['acros',[0],[0], 'acros_prop.xls'], 107 # ['adesis',[0],[0],'adesis_prop.xls'], 108 # ['alfa',[0],[0],'alfa_prop.xls'], 109 # ['apollo',[0],[0],'apollo_prop.xls'], 110 # ['aronis',[0],[0],'aronis_prop.xls'], 111 # ['arvi',[0],[0],'arvi_prop.xls'], 112 # ['asin',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17], 113 # [0,1,2],'asin_prop.xls'], 114 # ['astatech',[0],[0],'astatech_prop.xls'], 115 # ['asymchem',[0],[0],'asymchem_prop.xls'], 116 # ['aurora',[0],[0],'aurora_prop.xls'], 117 # ['bachem',[0],[0],'bachem_prop.xls'], 118 # ['biosynth',[0],[0],'biosynth_prop.xls'], 119 # ['cdiv',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22], 120 # [0,1,2,3],'cdiv_prop.xls'], 121 # ['chbr',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18], 122 # [0,1],'chbr_prop.xls'], 123 # ['chemblock',[0,1,2,3,4],[0],'chemblock_prop.xls'], 124 # ['combiblocks',[0],[0],'combiblocks_prop.xls'], 125 # ['cvchem',[0],[0],'cvchem_prop.xls'], 126 # ['dbex',[0],[0],'dbex_prop.xls'], 127 # ['dbnu',[0],[0],'dbnu_prop.xls'], 128 # ['dbsm',[0],[0],'dbsm_prop.xls'], 129 # ['enamine',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 130 # 26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48], 131 # [0,1,2,3,4,5],'enamine_prop.xls'], 132 # ['fda',[0],[0],'fda_prop.xls'], 133 # ['frinton',[0],[0],'frinton_prop.xls'], 134 # ['frontier',[0],[0],'frontier_prop.xls'], 135 # ['hmdb',[0],[0],'hmdb_prop.xls'], 136 # ['ibs',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24],[0,1,2,3,4],'ibs_prop.xls'], 137 # ['iflab',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26],[0,1,2],'iflab_prop.xls'], 138 # ['indofine',[0],[0],'indofine_prop.xls'], 139 # ['indsup',[0],[0],'indsup_prop.xls'], 140 # ['infarmatik',[0],[0],'infarmatik_prop.xls'], 141 # ['keyo',[0],[0],'keyo_prop.xls'], 142 # ['lopac',[0],[0],'lopac_prop.xls'], 143 # ['mayb',[0,1,2],[0],'mayb_prop.xls'], 144 # ['mdpi',[0],[0],'mdpi_prop.xls'], 145 # ['micros',[0],[0],'micros_prop.xls'], 146 # ['nano',[0,1,2],[0],'nano_prop.xls'], 147 # ['npd',[0,1,2,3,4],[0],'npd_prop.xls'], 148 # ['oakwood',[0],[0],'oakwood_prop.xls'], 149 # ['otava',[0,1,2,3,4,5,6,7,8,9,10,11,12,13],[0,1],'otava_prop.xls'], 150 # ['peak',[0],[0],'peak_prop.xls'], 151 # ['peptech',[0],None,'peptech_prop.xls'], 152 # ['pharmek',[0,1,2,3,4,5,6,7],[0,1,2],'pharmek_prop.xls'], 153 # ['pubchem',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 154 # 26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50, 155 # 51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75, 156 # 76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100, 157 # 101,102,103,104,105,106,107,108,109,110,111,112], 158 # [0,1,2,3,4,5,6,7,8,9,10],'pubchem_prop.xls'], 159 # ['sial',[0],[0],'sial_prop.xls'], 160 # ['specs',[0,1,2,3,4,5,6,7,8,9,10,11],[0,1],'specs_prop.xls'], 161 # ['spectrum',[0],[0],'spectrum_prop.xls'], 162 # ['synchem',[0],[0],'synchem_prop.xls'], 163 # ['synquest',[0],[0],'synquest_prop.xls'], 164 # ['synthonix',[0],[0],'synthonix_prop.xls'], 165 # ['timtec',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],[0,1,2],'timtec_prop.xls'], 166 # ['tocris',[0],[0],'tocris_prop.xls'], 167 # ['toslab',[0],[0],'toslab_prop.xls'], 168 # ['zannan',[0],[0],'zannan_prop.xls'] 169 # ] 170 # 171 # 172 # basedir='/tmp/ZINC/ZINC_270608' 173 # ###### parse standard ZINC SDF file ########################## 174 # 175 # print '*** DataBase STANDART shelve dictionary creation ***' 176 # stdall=[] ## list of lists for all standard files to import 177 # extall=[] ## list of list for all extended files to import 178 # propall=[]## list of property files 179 # for item in zincsetuplist: 180 # stdfile,extfile,propfile=genitem(item,basedir) 181 # stdall.append(stdfile) 182 # extall.append(extfile) 183 # propall.append(propfile) 184 # 185 ########## END OF GENERAL SETTINGS ###################### 186 # 187 # ## parse standard SDF file ########################### 188 # for stdfilelist, propfilename in zip(stdall,propall): 189 # sdfilenamelist=genGO(stdfilelist,propfilename) 190 # for sdfitem in sdfilenamelist: 191 # sdfilename=sdfitem[0] 192 # indictfilename=sdfitem[1] 193 # propfilename=sdfitem[2] 194 # A=SDFile(sdfile=sdfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='StdZINC') 195 # A.parseSDFOE() 196 # if propfilename: 197 # A=PropZINC(propfile=propfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='StdZINC') 198 # A.proParse() 199 # print 'Done!' 200 # 201 # ###### parse extended SDF file ############################## 202 # for extfilelist, propfilename in zip(extall,propall): 203 # extfilenamelist=genGO(extfilelist,propfilename) 204 # for sdfitem in extfilenamelist: 205 # extfilename=sdfitem[0] 206 # indictfilename=sdfitem[1] 207 # propfilename=sdfitem[2] 208 # A=SDFile(sdfile=extfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='ExtZINC') 209 # A.parseSDFOE() 210 # if propfilename: 211 # A=PropZINC(propfile=propfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='ExtZINC') 212 # A.proParse() 213 # print 'Done!' 214 # ####### END OF IMPORT OBJECT CREATION ########################### 215 # 216 # ### ZINC ALL subsets 217 # zincsetuplist=[ 218 # ['10',[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 219 # 26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50, 220 # 51,52,53,54,55,56,57,58,59], 221 # [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25, 222 # 26,27,28,29,30,31,32,33,34,35,36],'10_prop.xls'] 223 # ] 224 # 225 # basedir='/tmp/ZINC/ZINC_010708' 226 ### ####### parse standard ZINC SDF file ########################## 227 # 228 # print '*** DataBase STANDART shelve dictionary creation ***' 229 # stdall=[] ## list of lists for all standard files to import 230 # extall=[] ## list of list for all extended files to import 231 # propall=[]## list of property files 232 # for item in zincsetuplist: 233 # stdfile,extfile,propfile=genitem(item,basedir) 234 # stdall.append(stdfile) 235 # extall.append(extfile) 236 # propall.append(propfile) 237 # 238 ############## END OF GENERAL SETTINGS ###################### 239 # 240 # #### parse standard SDF file ########################### 241 # for stdfilelist, propfilename in zip(stdall,propall): 242 # sdfilenamelist=genGO(stdfilelist,propfilename) 243 # for sdfitem in sdfilenamelist: 244 # sdfilename=sdfitem[0] 245 # indictfilename=sdfitem[1] 246 # propfilename=sdfitem[2] 247 # A=SDFile(sdfile=sdfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='StdZINC') 248 # A.parseSDFOE() 249 # if propfilename: 250 # A=PropZINC(propfile=propfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='StdZINC') 251 # A.proParse() 252 # print 'Done!' 253 # 254 ## ###### parse extended SDF file ############################## 255 # for extfilelist, propfilename in zip(extall,propall): 256 # extfilenamelist=genGO(extfilelist,propfilename) 257 # for sdfitem in extfilenamelist: 258 # extfilename=sdfitem[0] 259 # indictfilename=sdfitem[1] 260 # propfilename=sdfitem[2] 261 # A=SDFile(sdfile=extfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='ExtZINC') 262 # A.parseSDFOE() 263 # if propfilename: 264 # A=PropZINC(propfile=propfilename,dictpath=dictionarypath,dictfilename=indictfilename,path='/tmp/Log',filename='ExtZINC') 265 # A.proParse() 266 # print 'Done!' 267 # ####### END OF IMPORT OBJECT CREATION ########################### 268 # 269 # ############# DATBASE IMPORT ############################ 270 # print '*** Import data from ZINC shelve dictionary *** ' 271 # ## SETTINGS ### 272 # basedir='/tmp/ZINC/ZINC_270608' 273 # ###### parse standard ZINC SDF file ########################## 274 # dictionarypath='/tmp/InputDB_v8_vendors' #path to shelve dictionary object 275 # 276 # zincsetuplist=[ 277 # ['adesis',[0],[0],'adesis_prop.xls'] 278 # ] 279 # zincsetuplist=zincsetuplist 280 # stdall=[] ## list of lists for all standard files to import 281 # extall=[] ## list of list for all extended files to import 282 # propall=[]## list of property files 283 # for item in zincsetuplist: 284 # stdfile,extfile,propfile=genitem(item,basedir) 285 # stdall.append(stdfile) 286 # extall.append(extfile) 287 # propall.append(propfile) 288 # ########### BASE IMPORT ################################# 289 # print '*** DataBase STANDART shelve dictionary DataBase IMPORT ***' 290 # ## parse standard generic shelve dictionary ############# 291 # for stdfilelist, propfilename in zip(stdall,propall): 292 # sdfilenamelist=genGO(stdfilelist,propfilename) 293 # for sdfitem in sdfilenamelist: 294 # sdfilename=sdfitem[0] 295 # indictfilename=sdfitem[1] 296 # propfilename=sdfitem[2] 297 # ## standard import ### 298 # A=SDict2DB(dbfilepath=indictfilename, 299 # path='/tmp/Log',filename='ZINCImportStd', 300 # host='localhost',db='',user='',passwd='',log=False) 301 # A.SDict2Tab(baseImport=True,lowercasetablenames=True) 302 # 303 # print '*** DataBase EXTENDED shelve dictionary DataBase IMPORT ***' 304 # for extfilelist, propfilename in zip(extall,propall): 305 # extfilenamelist=genGO(extfilelist,propfilename) 306 # for sdfitem in extfilenamelist: 307 # extfilename=sdfitem[0] 308 # indictfilename=sdfitem[1] 309 # propfilename=sdfitem[2] 310 # ## extended import ### 311 # A=SDict2DB(dbfilepath=indictfilename, 312 # path='/tmp/Log',filename='ZINCImportExt', 313 # host='localhost',db='',user='',passwd='',log=False) 314 # A.SDict2Tab(baseImport=False,lowercasetablenames=True) 315 # print 'Done!' 316