[Python] 抽取文件中指定字符串后面的字符串-2

基于上一个版本的改进https://www.cnblogs.com/FiaFia/p/9361225.html

这样就能把这行文件中需要的信息一个一个提出来,写成CSV,没有的值设为空

还没解决的问题是 newsitem = [tradableid[0],exchange[0],symbol[0],name[0],isin[0],currency,instrumentSubType,country,securityType[0],lotSize[0]]  

没有[0]的 在csv里面就显示为["309"],有[0]的在csv里面就是值,会好看一些

如果都设置为[0],如果遇到这一行中没有这个数值,就会报 'IndexError: list index out of range'

之后需要看看用能不能加个lambda 函数判断是否为空,为空就设置为‘’

 

import csv
import re


def extract_all(filename):
    result =[]
    
    print('Extracting data----')

    lines = open(filename, 'r', encoding='utf8', errors='ignore').readlines()
    
    for line in lines:
        if "BDt;" in line:
            tradableid = None
            exchange = None
            symbol = None
            name = None
            isin = None
            currency = None
            instrumentSubType = None
            country = None
            securityType = None
            newsitem = None
           
            tradableid = re.findall(r";i(.+?);",line)
            exchange = re.findall(r";Ex(.+?);",line)
            symbol = re.findall(r";SYm(.+?);",line)
            name = re.findall(r";NAm(.+?);",line)
            isin = re.findall(r";ISn(.+?);",line)
            currency = re.findall(r";CUt(.+?);",line)
            instrumentSubType = re.findall (r";INt(.+?);",line)
            country = re.findall(r";CNy(.+?);",line)
            securityType = re.findall(r";STy(.+?);",line)
            lotSize = re.findall(r";LSz(.+?);",line)
             
            newsitem = [tradableid[0],exchange[0],symbol[0],name[0],isin[0],currency,instrumentSubType,country,securityType[0],lotSize[0]]  
            result.append(newsitem)
        
    
    return result


def writetocsv(newsitems, reportfile):
    
    print('Start writing to csv')
    
    if newsitems:
        with open(reportfile, mode='w+', encoding='utf8', errors='ignore') as csvfile:
            writer = csv.writer(csvfile,lineterminator='\n')
            writer.writerow(['TradableID','Exchange','symbol','Name','isin','currenty','instrumentSubType','country','securityType','lotSize']) 
        
            for i in range (0, len(newsitems)):
                writer.writerow(newsitems[i])
   
    print('csv written done')
    return


if __name__ == '__main__':
    filename = 'XSTO_Stock_BasicData0725'
    data_file = filename  +'.tip'
    reportcsv = 'BDt_' + filename + '.csv'
    
    newsitemsum = extract_all(data_file)
    writetocsv(newsitemsum, reportcsv)

猜你喜欢

转载自www.cnblogs.com/FiaFia/p/9379007.html