White python learning record multi-threaded crawling ts fragment

 

from lxml import etree
import requests
from urllib import request
import time
import os
from queue import Queue
import threading
import re
from multiprocessing import pool
from urllib import request
def download(urls):
    for index in range(0,1342):
        n = index
        if(index<=999):
            n = str(n).zfill(3)
        else:
            n = str(n)
        # print(n)
        url = "https://zy.512wx.com/20171106/vM1OOVna/1200kb/hls/ppvod1983%s.ts" %n
        print("url",url)
        urls.put(url)
class Consumer(threading.Thread):
    def __init__(self,urls,*args,**kwargs):
        super(Consumer, self).__init__(*args,**kwargs)
        self.urls = urls
        self.queueLock = threading.Lock()
    def run(self):
        while True:
            if self.urls.empty():
                break
            else:
                self.queueLock.acquire()
                file = self.urls.get()
                self.queueLock.release()
                name = file.split('/')[-1]
                print("name",name)
                with open("./video/{}".format(name), "wb") as fp:
                    if os.access("./video/{}".format(name), os.F_OK):
                        pass
                    else:
                        resp = requests.get(file,verify = False)
                        fp.write(resp.content)
                print(name + "下载完成")
def get_ts(urls):
    with open("./video/kuiba.ts","wb") as fp:
        for index in range(0, len(urls)):
            resp =  requests.get(urls[index])
            fp.write(resp.content)
            print(str(index)+"下载完成")
def main():
    urls = Queue(2000)
    download(urls)
    # while not urls.empty():
    #     print(urls.get())
    for index in range(0,16):
        x = Consumer(urls)
        x.start()
if __name__ == '__main__':
    main()

Here opened 16 sub-thread

 

Guess you like

Origin www.cnblogs.com/jswf/p/12350057.html