python(2.7)中多线程使用举例

python(2.7)中多线程使用举例

下边的代码都不难理解,不做多余解释。唯一有困惑的地方已经在源码中注释说明。这里也不做多线程编码知识的讲解。把这几种形式(主要是第三种)练成muscle memory就行了,整理在这里是为了有时突然生疏方便查找,同时也做知识分享。


一、使用thread模块

import requests
import thread
import time

headers = {'User-Agent': 'mySpider-please let me try','Version':'1.0'}

def spider(url):
    r = requests.get(url,headers=headers)
    print r.status_code,len(r.content),time.ctime().split(' ')[3]

def main():
    for i in range(2304,2310,1):
        url = 'http://xxxx.net/ooxx/page-'+str(i)
        thread.start_new_thread(spider,(url,))
        time.sleep(0.1)

if __name__ == '__main__':
    main()

二、使用threading模块

import requests
import threading
import time

headers = {'User-Agent':'mySpider-please let me try','Version':'1.1'}

def spider(url):
    r = requests.get(url=url,headers=headers)
    print r.status_code,len(r.content),time.ctime().split(' ')[3]
    time.sleep(0.1)


def main():
    urls = []

    for i in range(2304,2310,1):
        url = 'http://xxxx.net/ooxx/page-'+str(i)
        urls.append(url)

    threads = []        
    thread_count = len(urls)

    for i in range(thread_count):
        t = threading.Thread(target=spider,args=(urls[i],))
        threads.append(t)

    for i in range(thread_count):
        threads[i].start()

    for i in range(thread_count):
        threads[i].join()

if __name__ == '__main__':
    main()

三、threading模块+Queue模块

import requests
import threading
import time
from Queue import Queue

class XxxxSpider(threading.Thread):
    """docstring for XxxxSpider"""
    def __init__(self,queue):
        threading.Thread.__init__(self)
        self._queue = queue

    def run(self):
    """
    start()让run()在新线程里面运行。直接调用run()就是在当前线程运行了。 
    start()调用_thread的start_new_thread去运行一个bootstrap方法,在里面做一些准备工作后会调用run() 
    """
        while not self._queue.empty():
            page_url = self._queue.get_nowait()
            print page_url


headers = {'User-Agent':'mySpider-please let me try','Version':'1.2'}
def spider(url):
    r = requests.get(url=url,headers=headers)
    print r.status_code,len(r.content),time.ctime().split(' ')[3]
    time.sleep(0.1)

def main():
    queue = Queue()

    for i in range(2304,2310,1):
        queue.put('http://xxxx.net/ooxx/page-'+str(i))

    threads = []
    thread_count = 10

    for i in xrange(thread_count):
        threads.append(XxxxSpider(queue))

    for t in threads:
        t.start()
        """
        start()让run()在新线程里面运行。直接调用run()就是在当前线程运行了。 
        start()调用_thread的start_new_thread去运行一个bootstrap方法,在里面做一些准备工作后会调用run() 
        """

    for t in threads:
        t.join()

if __name__ == '__main__':
    main()

猜你喜欢

转载自blog.csdn.net/AlimSah/article/details/62424812
今日推荐