版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/apollo_miracle/article/details/84865350
1 超时参数的使用
在平时网上冲浪的过程中,我们经常会遇到网络波动,这个时候,一个请求等了很久可能仍然没有结果
对应的,在爬虫中,一个请求很久没有结果,就会让整个项目的效率变得非常低,这个时候我们就需要对请求进行强制要求,必须在特定的时间内返回结果,否则就报错
使用方法如下:
response = requests.get(url,timeout=3)
通过添加timeout参数,能够保证在3秒钟内返回响应,否则会报错
2 retrying模块的使用
上述方法能够加快我们整体的请求速度,但是在正常的网页浏览过成功,如果发生速度很慢的情况,我们会做的选择是刷新页面,那么在代码中,我们是否也可以刷新请求呢?
对应的,retrying模块就可以帮助我们解决
使用方法如下:
import requests from retrying import retry @retry(stop_max_attempt_number=3) # 最大重试3次,3次全部报错,才会报错 def _parse_url(url) response = requests.get(url, headers=headers, timeout=3) # 超时的时候回报错并重试 assert response.status_code == 200 # 状态码不是200,也会报错并重试 return response def parse_url(url) try: # 进行异常捕获 response = _parse_url(url) except Exception as e: print(e) response = None return response
3 下载QQ音乐付费歌曲代码升级
详细代码书写过程点击查看 ☞ https://blog.csdn.net/apollo_miracle/article/details/84852254
import json
import sys
import requests
from retrying import retry
class QQMusic(object):
def __init__(self):
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
"referer": "https://y.qq.com/n/yqq/playlist/5836559239.html"
}
# 获取songmid 的 url
self.get_songmid_url = "https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg?singermid=003UjO1f3dJMRT&order=listen&begin=0&num=30&songstatus=1"
# 获取purl 的 url
self.purl_temp = 'https://u.y.qq.com/cgi-bin/musicu.fcg?data={"req_0":{"module":"vkey.GetVkeyServer","method":"CgiGetVkey","param":{"guid":"7208009084","songmid":["%s"],"songtype":[0],"uin":"0","loginflag":1,"platform":"20"}}}'
# 获取歌曲 的 url
self.url_temp = "http://124.203.224.158/amobile.music.tc.qq.com/"
# 代理
self.proxies = {"https": "https://118.122.92.252:37901"}
# 最多尝试6次
@retry(stop_max_attempt_number=6)
def _get_songmid(self):
"""获取songmid参数"""
# 发送请求,获取响应 3s之内没有回应就强制结束
resp = requests.get(self.get_songmid_url, headers=self.headers, proxies=self.proxies, timeout=3)
# 将获取结果进行解码
url_str = resp.content.decode()
# 转化为字典格式
dict_str = json.loads(url_str)
# 获取数据信息列表
song_list = dict_str["data"]["list"]
# 遍历列表获取songmid、songname
name_mid_list = []
for name_mid in song_list:
name_mid_dict = {}
name_mid_dict["song_name"] = name_mid["musicData"]["songname"]
name_mid_dict["song_mid"] = name_mid["musicData"]["songmid"]
name_mid_list.append(name_mid_dict)
return name_mid_list
def get_songmid(self):
try:
name_mid_list = self._get_songmid()
except Exception as e:
print(e)
name_mid_list = None
return name_mid_list
# 最多尝试6次
@retry(stop_max_attempt_number=6)
def _get_purl(self, songmid):
"""获取purl参数"""
# 拼接purl
purl = self.purl_temp % songmid
# 发送请求,获取响应 3s之内没有回应就强制结束
resp = requests.get(purl, headers=self.headers, proxies=self.proxies, timeout=3)
# 对结果进行解码
ret_json = resp.content.decode()
# 转化为字典
ret_dict = json.loads(ret_json)
# 获取purl
purl = ret_dict["req_0"]["data"]["midurlinfo"][0]["purl"]
# 返回数据
return purl
def get_purl(self, songmid):
try:
purl = self._get_purl(songmid)
except Exception as e:
print(e)
purl = None
return purl
# 最多尝试6次
@retry(stop_max_attempt_number=6)
def _parse_url(self, purl):
"""下载数据"""
# 拼接url
url = self.url_temp + purl
# 发送请求,获取响应 30s之内没有回应就强制结束
resp = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=30)
# 返回数据
return resp.content
def parse_url(self, purl):
try:
content = self._parse_url(purl)
except Exception as e:
print(e)
content = None
return content
def save_music(self, songname, content):
"""保存歌曲"""
with open("./music/" + songname + ".m4a", "wb") as f:
f.write(content)
# 下载结束
print(songname, "download over!")
def run(self): # 实现主要逻辑
# 1.获取songmid、songname等参数
name_mid_list = self.get_songmid()
# 判断是否获取到songmid、songname等参数
if not name_mid_list:
print("获取songmid、songname等参数失败!")
# 强制退出
sys.exit()
# 2.遍历获取purl
for name_mid in name_mid_list:
songname = name_mid["song_name"]
songmid = name_mid["song_mid"]
purl = self.get_purl(songmid)
# 判断是否获取到purl 参数
if not purl:
print(songname, "下载失败!")
continue
# 3.发送请求,获取响应
content = self.parse_url(purl)
# 判断是否返回数据
if not content:
print(songname, "下载失败!")
continue
# 4.保存数据
self.save_music(songname, content)
if __name__ == '__main__':
music = QQMusic()
music.run()
成果展示: