构思:
可以考虑选择多任务协程版来下载,这样快速更方便
1. 导入import gevent模块
2. 使用猴哥来打补丁:
from gevent import monkey
monkey.patch_all()
3. 调用spawn方法:
gevent.spawn(函数名,传参)
扫描二维码关注公众号,回复:
1216385 查看本文章
import urllib.request
from gevent import monkey
monkey.patch_all() #使用猴子来打补丁
import reimportgevent
def myclient():
url = "https://www.douyu.com/directory/game/yz"
ua_header = {
"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}
request = urllib.request.Request(url, headers=ua_header)
response = urllib.request.urlopen(request)
html = response.read()
pics = re.findall(r'https://.*\.jpg', html.decode())
print(pics)
return pics
def download(pics):
"""下载"""
for pic_url in pics:
content = urllib.request.urlopen(pic_url).read()
file_name = pic_url[pic_url.rfind("/"):]
with open("./pics/" + file_name, "wb") as file:
file.write(content)
def main():
"""爬取斗鱼"""
g1 = gevent.spawn(download)
g2 = gevent.spawn(myclient)
gevent.joinall([g1,g2])
download(pics)