maoyan
import requests
import json
from lxml import etree
def getOnePage(n):
url=f'https://maoyan.com/board/4?offset={(n-1)*10}'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'}
r=requests.get(url,headers=headers)
#print(r)
#返回文本
return r.text
def parse(text):
'''解析数据'''
html=etree.HTML(text)
names=html.xpath('//div[@class="movie-item-info"]/p[@class="name"]/a/@title')
releasetimes=html.xpath('//div[@class="movie-item-info"]/p[@class="releasetime"]/text()')
#print(names)
#print(releasetimes)
item={}
for name,releasetime in zip(names,releasetimes):
item['name']=name
item['releasetime']=releasetime
yield item
def save2file(data):
'''保存数据'''
with open('movie.json','a',encoding='utf-8') as f:
#将字典等python类型对象转换成字符串 \n 换行
data=json.dumps(data,ensure_ascii=False)+'\n'
f.write(data)
def run():
for i in range(1,11):
text=getOnePage(i)
items=parse(text)
for item in items:
save2file(item)
if __name__=='__main__':
run()