Save txt


urllib.request Import
from BS4 Import BeautifulSoup
Import os
Import Time
# find the URL
DEF getDatas ():
    url = "https://movie.douban.com/top250"
    # url = "File: /// E: / scrapy / 2018 -04-27 / Movie / movie.html "
    # open web
    RES = the urllib.request.urlopen (url)
    # conversion format
    the Response = BeautifulSoup (RES, 'html.parser')
    # parent element you want to find data
    datas = response .find_all ( 'div', { 'class': 'Item'})
    # Print (DATAS)
    # Create a stored data file folder
    FOLDER_NAME = "Output"
    IF Not os.path.exists (FOLDER_NAME):
            os.mkdir (FOLDER_NAME )
    # definition file
    current_time = time.strftime ( '% Y-% m-% d', time.localtime ())
    file_name="move"+current_time+".txt"
    # 文件路径
    file_path=folder_name+"/"+file_name

    for item in datas:
        # print(item)
        rank=item.find('div',{'class':'pic'}).find('em').get_text()
        title=item.find('div',{'class':'info'}).find('div',{'class':'hd'}).find('a').find('span',{'class':'title'}).get_text()
        picUrl=item.find('div',{'class':'pic'}).find('a').find('img').get('src')
        # print(picUrl)
        # 保存数据为txt格式
        try:
            with open(file_path,'a',encoding="utf-8") as fp:
                fp.write("排名:"+rank+'\n')
                fp.write("标题:"+title+'\n')
                fp.write("图片路径:"+picUrl+'\n\n')
        except IOError as err:
            print('error'+str(err))
        finally:
            fp.close()
    pass



Guess you like

Origin www.cnblogs.com/1208xu/p/11913030.html