爬取视频

爬取千峰的JavaScript视频

 1 import requests
 2 from urllib.parse import quote
 3 from lxml import etree
 4 '''
 5 URL
 6     http://video.mobiletrain.org/course/index/courseId/479
 7 请求方式
 8     GET
 9 请求头
10     User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36
11 '''
12 #模拟发送请求获取响应
13 response =requests.get(
14     url='http://video.mobiletrain.org/course/index/courseId/479',
15     headers={
16         'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
17     }
18 )
19 html = response.text
20 #获取页面中的视频地址
21 eroot = etree.HTML(html)
22 hrefs = eroot.xpath("//li[@class='clearfix j-url-list']/a/@data-url")
23 for href in hrefs:
24     print(href)
25     # 设置文件名称
26     start_index = href.find('')+1
27     end_index = -4
28     filename = href[start_index:end_index]
29     #从href中截取中文
30     start_url = href.find("")
31     uri = href[start_url:end_index]
32     #构造视频真正的访问地址
33     start_uri = 'http://7xtcwd.com1.z0.glb.clouddn.com/'
34     #对中文进行编码
35     end_uri = quote(uri)
36     src = start_uri+end_uri+".mp4"
37 
38     with open(filename+'.mp4', 'wb') as f:
39         # 使用request下载文件
40         video_response = requests.get(
41             url=src,
42             stream=True
43         )
44         print("正在下载:", src)
45         # 每下载512个字节就回调一次
46         for chunk in video_response.iter_content(chunk_size=512):
47             f.write(chunk)

转载于:https://www.cnblogs.com/chaunceyji/p/10995266.html

猜你喜欢

转载自blog.csdn.net/weixin_34293911/article/details/93683809