python 批处理excel文件实现数据的提取

 
 
import  re
import  xlrd
f1  =  open ( "v9_c8_a3_a16.txt" , "w" )
f2  =  open ( "a9_not_c8a3a16.txt" , "w" )
f3  =  open ( "c8_not_v9a3a16.txt" , "w" )
f4  =  open ( "a3_not_v9c8a16.txt" , "w" )
f5  =  open ( "a16_not_v9c8a3.txt" , "w" )
def  read( file , sheet_index = 0 ):
     workbook  =  xlrd.open_workbook( file )
     sheet  =  workbook.sheet_by_index(sheet_index)
     print ( "工作表名称:" , sheet.name,  "行数:" , sheet.nrows,  "列数:" , sheet.ncols)
     data  =  []
     for  in  range ( 0 , sheet.nrows):
         data.append(sheet.row_values(i))
     return  data
 
def  red(text):
     with  open (text,  'r' ) as f:
         file  =  f.read()
         regexp  =  r 'MGG_\d{5}'
         pat  =  re. compile (regexp)
         MGG_all  =  re.findall(pat,  file )
         Mgg_unique  =  set (MGG_all)
         return  Mgg_unique
v9  =  read(r 'zhu.xlsx' )
c8  =  read(r 'liu.xlsx' )
a3  =  red(r 'ATG3.csv' )
a16  =  red(r 'ATG16.csv' )
def  reg(data):
         regexp  =  r 'MGG_\d{5}'
         pat  =  re. compile (regexp)
         MGG_all  =  re.findall(pat,  str (data)) #需为string格式
         Mgg_unique  =  set (MGG_all)
         return  Mgg_unique
def  vps9():
         return  reg(v9)
def  cdk8():
         return  reg(c8)
def  Atg3():
     return  reg(a3)
def  Atg16():
     return  reg(a16)
def  Mgg1_Mgg2():
     v9  =  vps9()
     c8  =  cdk8()
     a3  =  Atg3()
     a16  =  Atg16()
     v9_c8_a3_a16  =  v9&c8&a3&a16
     v9_not_c8a3a16  =  v9 - (c8|a3|a16)
     c8_not_v9a3a16  =  c8 - (v9|a3|a16)
     a3_not_v9c8a16  =  a3 - (v9|c8|a16)
     a16_not_v9c8a3  =  a16 - (v9|a3|c8)
     return  v9_c8_a3_a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16,a16_not_v9c8a3
def  message():
     v9_c8_a3a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16, a16_not_v9c8a3  =  Mgg1_Mgg2()
     with  open ( 'magnaporthe.txt' , 'r' ) as f:
         file  =  f.read()
         infile  =  file .split( '>' )
         for  in  infile:
             for  in  v9_c8_a3a16:
                 if  in  m:
                     f1.write(i + ' ' + m)
             for  i2  in  v9_not_c8a3a16:
                 if  i2  in  m:
                     f2.write(i2 + ' ' + m )
             for  i3  in  c8_not_v9a3a16:
                 if  i3  in  m:
                     f3.write(i3 + ' ' + m )
             for  i4  in  a3_not_v9c8a16:
                 if  i4  in  m:
                     f4.write(i4 + ' ' + m )
             for  i5  in  a16_not_v9c8a3:
                 if  i5  in  m:
                     f5.write(i5 + ' ' + m )
message()

猜你喜欢

转载自blog.csdn.net/yaoxy/article/details/79338754