python爬取猫眼电影TOP100榜并将电影信息写入到Excel

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import xlrd,xlwt

urls = [
    "https://maoyan.com/board/4?offset={}".format(i)
    for i in range(0,100,10)
]

header = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ("
              "KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}

#电影 主演 时间 国家 评分
def FilmInformation(url):
    content = []
    r = requests.get(url,headers = header)
    respond = r.text
    soup = BeautifulSoup(respond,"html.parser")
    films = soup.select(".board-item-main")
    for film in films:
        name = (film.select("[title]")[0].text)
        # name = (film.select(".name a")[0].text) 也可以 为什么是0呢，因为返回的是一个列表
        staring = (film.select(".star")[0].text).strip().split("：")[1]  #中文的冒号
        releasetime = (film.select(".releasetime")[0].text).split("：")[1].split("(")[0]
        country = (film.select(".releasetime")[0].text).split("：")[1][10:]
        if country:
            country = country.replace("(","").replace(")","") #lstrip() rstip()等也可以实现
        else:
            country = "(暂无)"
        integer = (film.select(".integer")[0].text)
        fraction = (film.select(".fraction")[0].text)
        score = integer + fraction
        content.append([name,staring,releasetime,country,score])
    return content

def WriteExcel(data):
    title = ["电影","主演","时间","国家","评分"]
    workbook = xlwt.Workbook(encoding = "utf-8")
    sheet = workbook.add_sheet("猫眼前100")
    row = 1
    for i in range(len(title)):
        sheet.write(0,i,title[i])
    for num in data:
        for num_num in num:
            #for num_num_num in num_num:
            for col in range(len(num_num)):
                sheet.write(row,col,num_num[col])
            row +=1
    workbook.save("maoyan.xls")

def main():
    all = []
    for url in urls:
        result = FilmInformation(url)
        all.append(result)
    WriteExcel(all)

if __name__ =="__main__":
    main()
python爬取猫眼电影TOP100榜并将电影信息写入到Excel

猜你喜欢