aiohttp教程
https://www.cnblogs.com/ssyfj/p/9222342.html#14.clientsession-用于在多个连接之间同一网站共享cookie,请求头等
参考教程
https://www.jianshu.com/p/0efdc952e8ca
https://www.cnblogs.com/callyblog/p/11216961.html
import asyncio
from aiohttp import web
async def index(request):
await asyncio.sleep(0.5)
return web.Response(body=b'<h1>Index</h1>')
async def hello(request):
await asyncio.sleep(0.5)
text = '<h1>hello, %s!</h1>' % request.match_info['name']
return web.Response(body=text.encode('utf-8'))
async def init(loop):
app = web.Application(loop=loop)
app.router.add_route('GET', '/', index)
app.router.add_route('GET', '/hello/{name}', hello)
srv = await loop.create_server(app.make_handler(), '127.0.0.1', 8000)
print('Server started at http://127.0.0.1:8000...')
return srv
loop = asyncio.get_event_loop()
loop.run_until_complete(init(loop))
loop.run_forever()
500并发测试
import asyncio
import aiohttp
import time
url_lst_failed=[]
url_lst_successed=[]
async def get_info(url):
async with aiohttp.ClientSession() as session:
async with session.get(url,timeout=5) as resp:
if resp.status != 200:
url_lst_failed.append(url)
else:
url_lst_successed.append(url)
r = await resp.text()
start = time.time()
#创建一个循环
loop = asyncio.get_event_loop()
#创建一个任务盒子tasks,包含了3个需要完成的任务
tasks =[get_info('http://39.108.128.123:80/v1/select_user') for i in range(500)]
#tasks接入loop中开始运行
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print(end-start)
print(len(url_lst_successed))
import asyncio
import aiohttp
# pip install readability-lxml以安装
from readability import Document
def title_summary(fut):
res = fut.result() # 回调中调用result()才是上个函数的真实返回值
if res:
content, url = res
doc = Document(content, url)
print(doc.short_title(), doc.summary())
async def read_one(id_: int, url: str):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
async with aiohttp.ClientSession() as session:
try:
async with session.get(
url, headers=headers, timeout=1, verify_ssl=False) as r:
await asyncio.sleep(1 + random())
return await r.read(), await r.text(encoding=None, errors='ignore')
except:
pass
def read_many(links: list):
loop = asyncio.get_event_loop()
for id_, url in links:
task = asyncio.ensure_future(read_one(id_, url))
# 注意参数问题,这里不能传递多个参数,要么用functool的partial,要么干脆传递元组解包,也可以用lambda,官方比较推荐functool这里就不写了
task.add_done_callback(title_summary)
loop.run_until_complete(task)
loop.close()
def main():
links = [...] # 要跑的所有链接列表
read_many(links)
if __name__ == '__main__':
main()