python3爬虫 —— 爬取豆瓣电影信息

爬取豆瓣网站的电影信息,并保存到excel中。

代码:

import re,xlwt,requests

#初始化并创建一个工作簿
book = xlwt.Workbook()
#创建一个名为sheetname的表单
sheet = book.add_sheet('movie') #重复写入数据

headings = [u'排名', u'电影名称',u'导演',u'国家',u'年份',u'评分']
k =0
for j in headings:
    sheet.write(0, k, j)
    k = k+1


url = ' https://movie.douban.com/top250'
#头部信息
headers = {
        'user_agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    }

try:
    r= requests.get(url,timeout=30,headers=headers)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    text = r.text
    movie_info = re.findall(r'div class="pic">([\d\D]*?)<p class="quote">',text)

    count = 1
    for i in movie_info:
        rank = re.findall(r'<em class="">([\d]*)</em>',i)
        name = re.findall(r'span class="title">(\w*)</span>',i)
        director = re.findall(r'导演:([\d\D]*?)&nbsp;',i)
        year = re.findall(r'(\d{4})&nbsp;/&nbsp;',i)
        country = re.findall(r'\d{4}&nbsp;/&nbsp;([\d\D]*?)&nbsp;/&nbsp;',i)
        score = re.findall(r'<span class="rating_num" property="v:average">([\d.\d]*)',i)

        sheet.write(count,0,rank)
        sheet.write(count, 1, name)
        sheet.write(count, 2,  director)
        sheet.write(count, 3, year)
        sheet.write(count, 4, country)
        sheet.write(count, 5, score)

        count = count + 1
    book.save('电影信息.xls')

except:
    print('失败')

猜你喜欢

转载自blog.csdn.net/weixin_42365428/article/details/89075413