文章转载自:https://blog.csdn.net/lpwmm/article/details/108722225
原文作者给出详细的步骤,感谢分享。这里一方面对资源做个备份,另一方面对失效链接终止下载这一点做了些优化。
环境依赖
- Python 3.6+
- List item
- requests
- splinter
- chromedriver(注意需要与安装的Chrome大版本一致)
完整代码
# coding=gbk
import os
import requests as rq
from selenium import webdriver
from splinter import Browser
BASE_URL = 'https://mp.weixin.qq.com/s?__biz=MzU2OTI3NjQ4OA==&mid=100000538&idx=1&sn=419a647c0372d7630a197db9454bddc6&chksm=7c80623d4bf7eb2bca0756d63ee7250807d225a1fffe475db317d7c745b51074e5cc58b3e607&mpshare=1&scene=2&srcid=11076nyob9dLyKeQrFDgcnwY&sharer_sharetime=1565615878241&sharer_shareid=80ca6d7f86bae39e4b8264e1365d4b6a&from=timeline&clicktime=1565621528&key=0053416997e154650cb74184219ffa63550a1b3adf6c5c6bc705442434f0b0d05fbbe8217ed41a8f0f9165a2008bacbba44b624f535047b02cff8b47d698619ce97e48cf5663091b25deb3276764083a3719066790537f1813f0d7e3d5b6ae32fcc8cc3e49633034326feda342a8772d88981eedb113ea9a491c345c07ab271e&ascene=1&uin=MTY3NjYy&devicetype=Windows+10+x64&version=62090529&lang=zh_CN&exportkey=AaZhp%2BDCkmag01n7EMTREmg%3D&pass_ticket=hbFyRKZiXYK0ETPppZH0Xbwm2Tnpv7pFXeyOGE846fyLoLRcyDdPwaWI4mjwJzSu&wx_header=0'
bro = Browser('chrome')
if __name__ == '__main__':
# 访问目标页面
bro.visit(BASE_URL)
# 获取所有列表DOM
links = bro.find_by_css('a[data-linktype="2"]')
# 由于后面需要进行页面跳转,所以先将列表信息解析后存到数组变量中
tasks = []
for link in links:
if link.text:
tasks.append({
'title': link.text,
'url': link['href']
})
# 开始遍历任务
for i, t in enumerate(tasks[0:]):
filename = t['title']
filepath = os.path.join(os.getcwd(), 'mp3', f'{
filename}.mp3')
print(f'正在下载\t{
i}/{
len(tasks)}\t{
filename}')
bro.visit(t['url'])
# 跳过失效链接
try:
# 微信公众号页面中的音频文件只有在点击了开始播放后才会在HTML中加载出来<audio>标签
bro.find_by_css('.audio_card_switch').click()
audio_src = bro.find_by_tag('audio')['src']
open(filepath, 'wb').write(rq.get(audio_src).content)
except:
print("WARNNING!! link " + f'{
filename}' + " is outdated")
continue
bro.quit()
print('任务完成')