- Python 3 中有一个异步请求库aiohttp,异步的爬虫我写的很少,不说了一切尽在代码中。
class AsyncSpider(object):
"""异步请求"""
def __init__(self,headers):
self.headers = headers
async def get(self,url):
session = aiohttp.ClientSession()
response = await session.get(url,headers=self.headers)
result = await response.text()
await session.close()
return result
async def request(self,url):
result = await self.get(url)
return result
async def get(url):
return requests.get(url,headers=config.headers).content
async def reqt(url):
response = await get(url)
return response
class Spider_Project(object):
def get_parse(self,response):
params = re.compile(r'<img.*?src="(.*?)".*?alt="(.*?)">',re.S)
content_list = re.findall(params,response)
img_info_dict = dict()
list_name = []
for item in content_list:
img_info_dict["".join(["https:",item[0]])]= item[1].split("/")[0]
list_name.append(re.split(r'["\s]+', item[1].split("/")[0])[0])
tasks = [asyncio.ensure_future(reqt(item)) for item in img_info_dict]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
for task in tasks:
path ="./萝莉"+"/"+list_name[tasks.index(task)]+".jpg"
with open(path,"wb") as f:
f.write(task.result())
def run():
asy=AsyncSpider(config.headers)
task = asyncio.ensure_future(asy.request(config.url))
loop= asyncio.get_event_loop()
loop.run_until_complete(task)
result = task.result()
Spider_Project().get_parse(result)
if __name__ == '__main__':
run()