douban top250爬取

#通过豆瓣top250网页,将排行榜中的250个电影名字,发布年份,评分,评论人数四项内容写入到data.csv文件中
import requests
import re
import csv

url = "https://movie.douban.com/top250"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
number = 0

obj=re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>.*?<p class="">.*?<br>(?P<year>.*?)&nbsp.*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?<span>(?P<num>.*?)人评价</span>',re.S) #命名时?P中的P必须是大写,re.S中的S必须是大写。

for i in range(10):
    keyvalue = {
        "start": number,
        "filter": ""
    }
    filename = "第{}页.csv".format(i+1)
    f = open(filename, mode="w", newline="")
    csvwriter = csv.writer(f)

    r = requests.get(url=url, headers=headers, params=keyvalue)
    page_content = r.text
    result = obj.finditer(page_content)
    for it in result:
        dic = it.groupdict()
        dic["year"] = dic["year"].strip()
        csvwriter.writerow(dic.values())
    f.close()
    number = number + 25

print("运行结束!")

おすすめ

転載: blog.csdn.net/weixin_47401101/article/details/121059298