1.线程
1 from concurrent.futures import ThreadPoolExecutor 2 import requests 3 import time 4 5 def task(url): 6 """ 7 下载页面 8 :param url: 9 :return: 10 """ 11 response = requests.get(url) 12 return response 13 14 # 通过回调函数处理 15 def done(future,*args,**kwargs): 16 response = future.result() #response [200] 17 print(response.status_code,response.content) 18 19 pool = ThreadPoolExecutor(7) 20 url_list = [ 21 'http://www.cnblogs.com/wupeiqi', 22 'http://huaban.com/favorite/beauty/', 23 'http://www.bing.com', 24 'http://www.zhihu.com', 25 'http://www.sina.com', 26 'http://www.baidu.com', 27 'http://www.autohome.com.cn', 28 ] 29 for url in url_list: 30 v = pool.submit(task,url) 31 v.add_done_callback(done) 32 33 pool.shutdown(wait=True)
1 from concurrent.futures import ThreadPoolExecutor 2 import requests 3 import time 4 5 def task(url): 6 response = requests.get(url) 7 print(url,response) 8 # 写正则表达式 这里直接返回处理 9 10 11 pool = ThreadPoolExecutor(7) # 线程池 12 url_list = [ 13 'http://www.cnblogs.com/wupeiqi', 14 'http://huaban.com/favorite/beauty/', 15 'http://www.bing.com', 16 'http://www.zhihu.com', 17 'http://www.sina.com', 18 'http://www.baidu.com', 19 'http://www.autohome.com.cn', 20 ] 21 for url in url_list: 22 pool.submit(task,url) 23 24 pool.shutdown(wait=True)
2. 进程
1 from concurrent.futures import ProcessPoolExecutor 2 import requests 3 4 5 from multiprocessing import freeze_support 6 7 8 def task(url): 9 response = requests.get(url) 10 print(url,response) 11 # 写正则表达式 12 13 14 pool = ProcessPoolExecutor(7) 15 url_list = [ 16 'http://www.cnblogs.com/wupeiqi', 17 'http://huaban.com/favorite/beauty/', 18 'http://www.bing.com', 19 'http://www.zhihu.com', 20 'http://www.sina.com', 21 'http://www.baidu.com', 22 'http://www.autohome.com.cn', 23 ] 24 if __name__ == '__main__': 25 freeze_support() 26 for url in url_list: 27 pool.submit(task,url) 28 29 pool.shutdown(wait=True)
1 from concurrent.futures import ProcessPoolExecutor 2 import requests 3 from multiprocessing import freeze_support 4 5 def task(url): 6 response = requests.get(url) 7 return response 8 9 def done(future,*args,**kwargs): 10 response = future.result() 11 print(response.url, response.status_code,response.content) 12 13 pool = ProcessPoolExecutor(4) 14 url_list = [ 15 'http://www.bing.com', 16 'http://www.sina.com', 17 'http://www.baidu.com', 18 'http://www.autohome.com.cn', 19 ] 20 21 if __name__ == '__main__': 22 freeze_support() 23 for url in url_list: 24 v = pool.submit(task,url) 25 v.add_done_callback(done) #回调 26 27 pool.shutdown(wait=True)
3.协程
模块有 asyncio(内置)、gevent、Twisted、Tornado
1 import asyncio 2 @asyncio.coroutine 3 def task(): 4 print('before....task.....') 5 yield from asyncio.sleep(5) 6 print('end...task....') 7 8 9 tasks = [task(),task()] 10 11 loop = asyncio.get_event_loop() 12 loop.run_until_complete(asyncio.gather(*tasks)) 13 loop.close()
1 import asyncio 2 3 @asyncio.coroutine 4 def task(host, url='/'): 5 print('start',host,url) 6 reader, writer = yield from asyncio.open_connection(host, 80) # 创建链接 7 8 request_header_content = "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n" % (url, host,) # 封装 9 request_header_content = bytes(request_header_content, encoding='utf-8') 10 11 writer.write(request_header_content) 12 yield from writer.drain() 13 text = yield from reader.read() 14 print('end',host, url, text) 15 writer.close() 16 17 tasks = [ 18 task('www.cnblogs.com', '/wupeiqi/'), 19 task('dig.chouti.com', '/pic/show?nid=4073644713430508&lid=10273091') 20 ] 21 22 loop = asyncio.get_event_loop() 23 results = loop.run_until_complete(asyncio.gather(*tasks)) 24 loop.close()
1 import aiohttp 2 import asyncio 3 import time 4 5 @asyncio.coroutine 6 def fetch_async(url): 7 print('begin...') 8 response = yield from aiohttp.request('GET', url) # 替换前面的 "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n" % (url, host,) 封装 9 time.sleep(5) 10 print(url, response) 11 response.close() 12 13 14 tasks = [fetch_async('http://www.baidu.com/'),fetch_async('http://www.chouti.com/')] # 15 16 event_loop = asyncio.get_event_loop() 17 results = event_loop.run_until_complete(asyncio.gather(*tasks)) 18 event_loop.close()
1 import gevent 2 import requests 3 from gevent import monkey 4 monkey.patch_all() #这一句千万不要忘记啦 socket是会阻塞的 monkey 会找到并解决阻塞 5 6 7 def task(method, url, req_kwargs): 8 print(method, url, req_kwargs) 9 response = requests.request(method=method, url=url, **req_kwargs) 10 print(response.url, response.content) 11 12 ##### 发送请求 这个请求没有请求限制 ##### 13 gevent.joinall([ 14 gevent.spawn(task, method='get', url='https://www.python.org/', req_kwargs={}), 15 gevent.spawn(task, method='get', url='https://www.yahoo.com/', req_kwargs={}), 16 gevent.spawn(task, method='get', url='https://github.com/', req_kwargs={}), 17 ])
1 import gevent 2 import requests 3 from gevent import monkey 4 monkey.patch_all() 5 6 7 def task(method, url, req_kwargs): 8 print(method, url, req_kwargs) 9 response = requests.request(method=method, url=url, **req_kwargs) 10 print(response.url, response.content) 11 ##### 发送请求(协程池控制最大协程数量) ##### 12 from gevent.pool import Pool 13 pool = Pool(5) 14 gevent.joinall([ 15 pool.spawn(task, method='get', url='https://www.python.org/', req_kwargs={}), 16 pool.spawn(task, method='get', url='https://www.yahoo.com/', req_kwargs={}), 17 pool.spawn(task, method='get', url='https://www.github.com/', req_kwargs={}), 18 ])
1 from twisted.internet import defer 2 from twisted.web.client import getPage 3 from twisted.internet import reactor 4 5 def one_done(arg): 6 print(arg) 7 8 def all_done(arg): 9 print('done') 10 reactor.stop() # 全部完成之后 stop 11 12 @defer.inlineCallbacks 13 def task(url): 14 res = getPage(bytes(url, encoding='utf8')) # 发送Http请求 2 15 res.addCallback(one_done) 16 yield res 17 18 url_list = [ 19 'http://www.cnblogs.com', 20 'http://www.baidu.com', 21 'http://www.cnblogs.com', 22 'http://www.cnblogs.com', 23 ] 24 25 defer_list = [] # [特殊,特殊,特殊(已经向url发送请求)] 26 for url in url_list: 27 # v 标注已经往哪发请求啦 28 v = task(url) # 1 29 defer_list.append(v) # 所有的请求已经发送完 添加到一个列表里面 30 31 d = defer.DeferredList(defer_list) 32 d.addBoth(all_done) # all_done 只执行一次 33 34 35 reactor.run() # 死循环 去defer_list 里面执行
1 from tornado.httpclient import AsyncHTTPClient 2 from tornado.httpclient import HTTPRequest 3 from tornado import ioloop 4 5 COUNT = 0 6 def handle_response(response): 7 global COUNT 8 COUNT -= 1 9 if response.error: 10 print("Error:", response.error) 11 else: 12 print(response.body) 13 # 方法同twisted 14 # ioloop.IOLoop.current().stop() 15 if COUNT == 0: 16 ioloop.IOLoop.current().stop() #COUNT 计数器终止循环 17 18 def func(): # 步骤2 19 url_list = [ 20 'http://www.baidu.com', 21 'http://www.bing.com', 22 ] 23 global COUNT 24 COUNT = len(url_list) 25 for url in url_list: 26 print(url) 27 http_client = AsyncHTTPClient() # 创建一个对象 28 http_client.fetch(HTTPRequest(url), handle_response) # 如果HTTPRequest 下载完成 就会执行 handle_response 29 30 31 ioloop.IOLoop.current().add_callback(func) # 步骤1 32 ioloop.IOLoop.current().start() # 死循环
自定义异步IO框架
1 import socket 2 import select 3 4 #以下写的都是客户端 5 6 # ########################## HTTP请求本质,阻塞 ########################## 7 """ 8 sk = socket.socket() 9 # 1.连接 10 sk.connect(('www.baidu.com',80,)) # IO阻塞 11 print('连接成功了...') 12 13 # 2. 连接成功发送消息 14 sk.send(b'GET / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\n') 15 # sk.send(b'POST / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\nk1=v1&k2=v2') #sk.recv(8096) 表示最多接受的消息大小 16 17 # 3. 等待着服务端响应 18 data = sk.recv(8096) # IO阻塞 19 print(data) 20 21 # 关闭连接 22 sk.close() 23 """ 24 # ########################## HTTP请求本质,非阻塞 ########################## 25 """ 26 sk = socket.socket() 27 sk.setblocking(False) 28 # 1.连接 29 try: 30 sk.connect(('www.baidu.com',80,)) # IO阻塞 31 print('连接成功了...') 32 except BlockingIOError as e: 33 print(e) 34 # 2. 连接成功发送消息 35 sk.send(b'GET / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\n') 36 # sk.send(b'POST / HTTP/1.0\r\nHost:www.baidu.com\r\n\r\nk1=v1&k2=v2') 37 38 # 3. 等待着服务端响应 39 data = sk.recv(8096) # IO阻塞 40 print(data) 41 42 # 关闭连接 43 sk.close() 44 """ 45 46 class MyHttpRequest: 47 def __init__(self,sk,host,callback): 48 self.socket = sk 49 self.host = host 50 self.callback = callback 51 def fileno(self): 52 return self.socket.fileno() 53 54 # 对响应数据的处理 55 class MyHttpResponse: 56 def __init__(self,recv_data): 57 self.recv_data = recv_data 58 self.header_dict = {} # 响应头 59 self.body = None # 响应体 60 61 self.initialize() 62 def initialize(self): 63 headers, body = self.recv_data.split(b'\r\n\r\n', 1) 64 self.body = body 65 header_list = headers.split(b'\r\n') 66 for h in header_list: # h是字节类型 67 h_str = str(h,encoding='utf-8')#转成str类型 68 v = h_str.split(':',1) 69 if len(v) == 2: 70 self.header_dict[v[0]] = v[1] 71 72 73 74 class AsyncRequest: 75 def __init__(self): 76 self.conn = [] 77 self.connection = [] # 用于检测是否已经连接成功 78 79 def add_request(self,host,callback): 80 try: 81 sk = socket.socket() 82 sk.setblocking(0) #这里设置为0 connect会报错BlockingIOError 设置为非阻塞 83 sk.connect((host,80,)) 84 except BlockingIOError as e: 85 pass 86 request = MyHttpRequest(sk,host,callback) 87 self.conn.append(request) 88 self.connection.append(request) 89 90 def run(self): # 事件循环 while True 91 92 while True: 93 rlist,wlist,elist = select.select(self.conn,self.connection,self.conn,0.05) #connection 表示连接的列表 conn表示接受 94 for w in wlist: #此时w是MyHttpRequest对象 95 print(w.host,'连接成功...') 96 # 只要能循环到,表示socket和服务器端已经连接成功 97 tpl = "GET / HTTP/1.0\r\nHost:%s\r\n\r\n" %(w.host,) 98 w.socket.send(bytes(tpl,encoding='utf-8')) 99 self.connection.remove(w) # 已经链接成功 就移除 100 for r in rlist: 101 # r,是MyHttpRequest 102 recv_data = bytes() 103 while True: # 循环接受数据 104 try: 105 data = r.socket.recv(8096) 106 recv_data += data 107 except Exception as e: 108 break 109 response = MyHttpResponse(recv_data) 110 r.callback(response) 111 r.socket.close() 112 self.conn.remove(r) 113 if len(self.conn) == 0: 114 break 115 116 def f1(response): 117 print('保存到文件',response.header_dict) 118 119 def f2(response): 120 print('保存到数据库', response.header_dict) 121 122 url_list = [ 123 {'host':'www.baidu.com','callback': f1}, 124 {'host':'cn.bing.com','callback': f2}, 125 {'host':'www.cnblogs.com','callback': f2}, 126 ] 127 128 req = AsyncRequest() 129 for item in url_list: 130 req.add_request(item['host'],item['callback']) 131 132 req.run()