maoyan.py

maoyan

import requests
import json
from lxml import etree

def getOnePage(n):
    url=f'https://maoyan.com/board/4?offset={(n-1)*10}'
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
    r=requests.get(url,headers=headers)  
    #print(r)
    #返回文本
    return r.text

def parse(text):
    '''解析数据'''
    html=etree.HTML(text)
    names=html.xpath('//div[@class="movie-item-info"]/p[@class="name"]/a/@title')
    releasetimes=html.xpath('//div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
    #print(names)
    #print(releasetimes)
    item={}
    for name,releasetime in zip(names,releasetimes):
        item['name']=name
        item['releasetime']=releasetime
        yield item

def save2file(data):
    '''保存数据'''
    with open('movie.json','a',encoding='utf-8') as f:
        #将字典等python类型对象转换成字符串  \n 换行
        data=json.dumps(data,ensure_ascii=False)+'\n'
        f.write(data)
        

def run():
    for i in range(1,11):
        text=getOnePage(i)
        items=parse(text)
        for item in items:
            save2file(item)

if __name__=='__main__':
    run()

おすすめ

転載: www.cnblogs.com/pengyy/p/11392080.html