python的这个线程池库感觉不是很好用,用起来的感觉也不怎么好。
# -*- coding: UTF-8 -*-
__docformat__ = "restructuredtext en" __all__ = [ 'makeRequests', 'NoResultsPending', 'NoWorkersAvailable', 'ThreadPool', 'WorkRequest', 'WorkerThread' ] __author__ = "Christopher Arndt" __version__ = '1.3.2' __license__ = "MIT license" # standard library modules import sys import threading import traceback try: import Queue # Python 2 except ImportError: import queue as Queue # Python 3 # exceptions class NoResultsPending(Exception): pass class NoWorkersAvailable(Exception): pass # internal module helper functions def _handle_thread_exception(request, exc_info): traceback.print_exception(*exc_info) # utility functions def makeRequests(callable_, args_list, callback=None, exc_callback=_handle_thread_exception): #制造工作请求
#觉得参数列表定义有点抽象,不怎么好用,
#目前只用过if里传参方式,格式要这样子[((),{}),] 首先外层是列表,每个子项是元祖,每个元组长度必定为2,元组的第一项还是个元组,第二项是字典 requests = [] for item in args_list: if isinstance(item, tuple): requests.append( WorkRequest(callable_, item[0], item[1], callback=callback, exc_callback=exc_callback) ) else: requests.append( WorkRequest(callable_, [item], None, callback=callback, exc_callback=exc_callback) ) return requests # classes class WorkerThread(threading.Thread): def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds): threading.Thread.__init__(self, **kwds) self.setDaemon(1) #默认是线程是守护的,主线程结束,大家都玩完,甭管还有多少请求 self._requests_queue = requests_queue #请求队列 self._results_queue = results_queue #结果队列 self._poll_timeout = poll_timeout #轮训超时时间 self._dismissed = threading.Event() #线程事件,这里用这个是为了修改线程池大小,其实我觉得用一个布尔值也没啥问题 self.start() # 创建线程时直接开启线程 def run(self): while True: if self._dismissed.isSet(): #如果事件标志被设置了,代表此线程报废,退出循环等待结束 break try: request = self._requests_queue.get(True, self._poll_timeout) # 从队列中取出一个请求,默认阻塞无法修改 except Queue.Empty: # 如果设置超时时间,则超时会捕捉队列为空的异常,然后直接下次循环 continue else: #这个else其实没啥用 if self._dismissed.isSet(): #这里是步秒棋,为了线程安全,在该线程没有执行请求前,一切皆有可能发生,所以再判断一下标志,如果被设置了,那就把请求再放回去,然后等待报废 # we are dismissed, put back request in queue and exit loop self._requests_queue.put(request) break try: result = request.callable(*request.args, **request.kwds) # 执行请求,这种传参方式在python中很经典 self._results_queue.put((request, result)) #将请求和结果以元组的形式放入结果队列,我觉得用字典更好 except: request.exception = True # 设置请求异常标志 self._results_queue.put((request, sys.exc_info())) def dismiss(self): self._dismissed.set() # 设置线程事件标志 class WorkRequest: def __init__(self, callable_, args=None, kwds=None, requestID=None, callback=None, exc_callback=_handle_thread_exception): if requestID is None: self.requestID = id(self) else: try: self.requestID = hash(requestID) # 请求ID,据我目测现在用处不大 except TypeError: raise TypeError("requestID must be hashable.") self.exception = False # 请求异常标志 self.callback = callback # 请求完成之后的回调 self.exc_callback = exc_callback # 请求异常回调 self.callable = callable_ # 请求执行函数 self.args = args or [] # 参数,但没看懂没什么要 or []? 刚写python恕我经验不足 self.kwds = kwds or {} # 同样看不懂为什么要 or{} def __str__(self): #重载打印函数 return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \ (self.requestID, self.args, self.kwds, self.exception) class ThreadPool: def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5): self._requests_queue = Queue.Queue(q_size) # 请求队列,默认无容量限制 self._results_queue = Queue.Queue(resq_size) # 结果队列,默认无容量限制 self.workers = [] # 工作线程列表 self.dismissedWorkers = [] # 准备干掉的工作线程列表 self.workRequests = {} # 请求ID与请求的映射,恕我看不出来有什么用,完全可以删掉 self.createWorkers(num_workers, poll_timeout) # 很明显了,创建工作线程,其实我想说,为什么工作线程和线程池不采用内部类的形式实现?
def createWorkers(self, num_workers, poll_timeout=5): # 可以增加工作线程 for i in range(num_workers): self.workers.append(WorkerThread(self._requests_queue, self._results_queue, poll_timeout=poll_timeout)) def dismissWorkers(self, num_workers, do_join=False): # 干掉几个工作线程, do_join这个参数有意思了 dismiss_list = [] for i in range(min(num_workers, len(self.workers))): # min的调用是很好防守 worker = self.workers.pop() worker.dismiss() # 设置线程事件标志,线程检查到标志后,跳出循环,等死 dismiss_list.append(worker) if do_join: # 默认不阻塞调用者,明显do_join是True就阻塞调用者了,直到所有要被干掉的线程结束 for worker in dismiss_list: worker.join() else: # 如果不阻塞的话,就把这些要被干掉的线程缓存起来 self.dismissedWorkers.extend(dismiss_list) def joinAllDismissedWorkers(self): # 把缓存的要被干掉的线程彻底干掉,此函数可能会阻塞调用者,注意是可能,因为线程可能在次之前就跳出循环死掉了 for worker in self.dismissedWorkers: worker.join() self.dismissedWorkers = [] def putRequest(self, request, block=True, timeout=None): # 将请求放入队列,在线程池被创建的时候,工作线程就开始工作了,所以这个函数等于执行请求 assert isinstance(request, WorkRequest) # 断言请求是否是workrequest assert not getattr(request, 'exception', None) # 断言请求异常标志是否设置,存在异常的请求不再被接受 self._requests_queue.put(request, block, timeout) self.workRequests[request.requestID] = request # 这一句没看出来有什么用 def poll(self, block=False): # 轮训结果队列,默认不阻塞,每次循环娶不到就报异常,有点恶心 while True: if not self.workRequests: raise NoResultsPending elif block and not self.workers: raise NoWorkersAvailable try: request, result = self._results_queue.get(block=block) if request.exception and request.exc_callback: request.exc_callback(request, result) if request.callback and not \ (request.exception and request.exc_callback): request.callback(request, result) del self.workRequests[request.requestID] except Queue.Empty: break def wait(self): # 阻塞轮训,等待所有结果,这也必然阻塞调用者 while 1: try: self.poll(True) except NoResultsPending: break