python3 异步爬虫

Python 3 中有一个异步请求库aiohttp，异步的爬虫我写的很少，不说了一切尽在代码中。

class AsyncSpider(object):
    """异步请求"""
    def __init__(self,headers):
        self.headers = headers
    async def get(self,url):
        session  = aiohttp.ClientSession()
        response = await session.get(url,headers=self.headers)
        result = await response.text() # str
        # result = await response.read() # bytes
        await session.close()
        return result
    async def request(self,url):
        result = await self.get(url)
        return result

async  def get(url):
    return requests.get(url,headers=config.headers).content
async def reqt(url):
    response = await get(url)
    return response

class Spider_Project(object):

    def get_parse(self,response):
        # resp=response.text
        params = re.compile(r'<img.*?src="(.*?)".*?alt="(.*?)">',re.S)
        content_list = re.findall(params,response)
        img_info_dict = dict()
        list_name = []
        for item in content_list:
            img_info_dict["".join(["https:",item[0]])]= item[1].split("/")[0]
            list_name.append(re.split(r'["\s]+', item[1].split("/")[0])[0])
        tasks = [asyncio.ensure_future(reqt(item)) for item in img_info_dict]
        loop = asyncio.get_event_loop()
        loop.run_until_complete(asyncio.wait(tasks))
        for task in tasks:
            path ="./萝莉"+"/"+list_name[tasks.index(task)]+".jpg"
            with open(path,"wb") as f:
                f.write(task.result())


def run():
    asy=AsyncSpider(config.headers)
    task = asyncio.ensure_future(asy.request(config.url))
    loop= asyncio.get_event_loop()
    loop.run_until_complete(task)
    result = task.result()
    # print(result)
    Spider_Project().get_parse(result)

if __name__ == '__main__':
    run()

go_flush

发布了127 篇原创文章 · 获赞 25 · 访问量 3万+

私信关注

猜你喜欢