python-爬猫眼电影

import requests
from lxml import etree
import re

#1. 定义请求的url
url = 'https://maoyan.com/news?showTab=3'
#2. 定义请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.2 (KHTML, like Gecko) Chrome/22.0.1216.0 Safari/537.2'
}
#3. 发起请求
res = requests.get(url,headers=headers)
text = res.text #html标签内容信息
# print(text)

#4. 筛选数据
dom = etree.HTML(text)
#可以使用xpath,或者正则表达式
movie_urls = dom.xpath('//h4[@class="video-name one-line"]/a[@href]/@href')
movie_names = dom.xpath('//h4[@class="video-name one-line"]/a/text()')

for movie_url,movie_name in zip(movie_urls,movie_names):
    # print(movie_url,movie_name)
    movie_id_string  = requests.get(movie_url).text
    # print(movie_id_string)
    #通过正则表达式筛选,也可以使用xpath
    movie_mp4_url = re.search('source src="(.*)" type=',movie_id_string).group(1)
    print(movie_mp4_url)
    # #拿到二进制的数据格式
    movie = requests.get(movie_mp4_url,headers).content

    with open(f'./movie/{movie_name}.mp4','wb') as fp:
        fp.write(movie)

发布了42 篇原创文章 · 获赞 12 · 访问量 6107

猜你喜欢

转载自blog.csdn.net/Alingyuzi/article/details/104248446