python crawling cat-eye movie TOP100 list and write the movie information to Excel

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import xlrd,xlwt

urls = [
    "https://maoyan.com/board/4?offset={}".format(i)
    for i in range(0,100,10)
]

header = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ("
              "KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}

#电影 主演 时间 国家 评分
DEF FilmInformation (URL): 
    Content = [] 
    R & lt = requests.get (URL, headers = header) 
    the respond = r.text 
    Soup = the BeautifulSoup (the respond, " html.parser " ) 
    Films = soup.select ( " .board-Item -main " )
     for Film in Films: 
        name = (film.select ( " [title] " ) [0] .text)
         # (. film.select name = (" A name ") [0] .text) may be Why is zero, because the return is a list of 
        Staring = (film.select ( ".star")[0].text).strip().split("")[1]  #中文的冒号
        releasetime = (film.select(".releasetime")[0].text).split("")[1].split("(")[0]
        country = (film.select(".releasetime")[0].text).split("")[1][10:]
        if country:
            country = country.replace("(","").replace(")","") #lstrip() rstip()等也可以实现
        else:
            country = "(暂无)"
        integer = (film.select(".integer")[0].text)
        fraction = (film.select(".fraction")[0].text)
        score = integer + fraction
        content.append([name,staring,releasetime,country,score])
    return content

def WriteExcel(data):
    title = ["电影","主演","时间","国家","评分"]
    workbook = xlwt.Workbook(encoding = "utf-8")
    sheet = workbook.add_sheet("猫眼前100")
    row = 1
    for i in range(len(title)):
        sheet.write(0,i,title[i])
    for num in data:
        for num_num in num:
            #for num_num_num in num_num:
            for col in range(len(num_num)):
                sheet.write(row,col,num_num[col])
            row +=1
    workbook.save("maoyan.xls")

def main():
    all = []
    for url in urls:
        result = FilmInformation(url)
        all.append(result)
    WriteExcel(all)

if __name__ =="__main__":
    main()

 

Guess you like

Origin www.cnblogs.com/python-kp/p/12518350.html