如何用Python脚本小程序爬取优酷视频

小编话不多说,直接上代码,有啥不懂的下面评论区解释,也请各位多多指教

#得到响应
import requests
import time
import re
t1 = time.time()
URL = ‘http://www.youku.com
print(‘开始!’)
for n in range(0,1):
url = URL.format(str(n))
try:
r=requests.get(url)
if ‘200’ in str®:
print(str(n) + ‘,’ + url)
else:
pass
except requests.exceptions.ConnectionError:
pass
print(‘完成!’)
#获取文字
from bs4 import BeautifulSoup
def get_html(url):
headers = { ‘User-Agent’:‘Request URL: https://dno-18mw.youku.com/mw_live/20181111-HLSX_4mw270/1541850855_984662.ts?auth_key=1571312435-0-0-9775d492f975755e78c7ed731ee189e7’ }
#模拟浏览器访问
response = requests.get(url,headers = headers) #请求访问网站
html = response.text #获取网页源码
return html #返回网页源码
soup = BeautifulSoup(get_html(URL)) #初始化BeautifulSoup库,并设置解析器
print(get_html(URL))
for li in soup.find_all(name=‘li’): #遍历父节点
for a in li.find_all(name=‘a’): #遍历子节点
if a.string==None:
pass
else:
print(a.string) #输出结果
#获取源码
URL = requests.get(URL)
print(URL.status_code) # 打印状态码
print(URL.url) # 打印请求
urlprint(URL.headers) # 打印头信息
print(URL.cookies) # 打印cookie信息
print(URL.text) #以文本形式打印网页源码print(URL.content) #以字节流形式打印t2 =time.time()print(t2 - t1)

猜你喜欢

转载自blog.csdn.net/qq_43513350/article/details/84668591
今日推荐