爬虫背景
目的:爬取猫眼电影排行
网页:https://maoyan.com/board/
分析过程:
查找相关内容即可,主要用于熟悉BeautifulSoup及其他基础知识
代码如下:
import requests
from bs4 import BeautifulSoup
url = 'https://maoyan.com/board/'
header = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
res = requests.get(url,headers=header).text
soup = BeautifulSoup(res,'html.parser')
update_time = '|'.join(list(soup.find('p',class_='update-time').strings))
miaoshu = str(soup.find('p',class_='board-content').string)
print (update_time)
print (miaoshu)
movies = soup.find_all('dd')
for movie in movies:
content = {}
content['排行榜:'] = '第'+str(movie.i.string)+'名'
content['电影名:'] = movie.a['title']
content['主演:'] = (str(movie.find('p',class_='star').string).strip())[3:]
content['上映时间'] = (str(movie.find('p',class_='releasetime').string).strip())[5:]
content['电影链接'] = 'https://maoyan.com'+movie.a['href']
try:
score = ''.join(list(movie.find('p',class_='score').strings))
content['评分:'] = score
except AttributeError:
content['评分:'] = '暂无评分'
print ('=='*20)
for i,j in content.items():
print (i,j)
print ()