python3 download multiple processes specified list

This code implements a multi-process download download specified list of functions.

Considerations include:

1, the download process, show the total number of existing, downloaded, errors, and other remaining information in order to keep abreast of progress.

2, the number of retries can be specified for each download (as specified in the program)

3, the number of processes, list of downloads are command line arguments

4, save the need to specify the location in the program

 

# -*- coding: utf-8 -*-
"""
Created on Sat Nov 16 07:52:40 2019

@author: mi
"""
import requests
import os
import csv

exist_count=0#已存在
downloaded_count=0#已下载
total_count=0#总数
error_count=0#出错

def downloading_over(arg):
    global downloaded_count
    global total_count
    global exist_count
    global error_count
    print("返回状态:",arg)
    if arg=='EXISTS':
        exist_count+=1
    if arg=='SUCCESS':
        downloaded_count+=1
    if arg=='ERROR':
        error_count+=1
    print('总数:%s / 已存在:%s / 已下载:%s / 出错:%s / 剩余:%s' % (str(total_count),str(exist_count),str(downloaded_count),str(error_count),str(total_count-exist_count-downloaded_count-error_count)))


def get_page(link):
    url=link[0]
    savePath=link[1]
    print(savePath)
    if os.path.exists(savePath):
        print('已存在')
        return 'EXISTS'
    times=3
    while (times>0):
        times=times-1
        try:
            resp=requests.get(url,timeout=30)
        except requests.RequestException as e:
            print(e)
            continue
        if not os.path.exists(os.path.dirname(savePath)):
            os.makedirs(os.path.dirname(savePath))

        with open(savePath,'wb')as fw:
            fw.write(resp.content)
        return 'SUCCESS'
    else:
        return 'ERROR'

from multiprocessing import Pool
import sys


if __name__ == "__main__":
    __spec__ = "ModuleSpec(name='builtins', loader=<class '_frozen_importlib.BuiltinImporter'>)"
    process_num=sys.argv[1]
    print('The number of processes:' + Process_num) 
    download_list = the sys.argv [2 ]
     Print ( ' Download List: ' + download_list) 
    
    the pool = Pool (Processes = int (process_num))     # SET max Number The Processes 
    with Open (download_list, ' R & lt ' , encoding = ' UTF-. 8 ' ) AS downlist: 
        Lines = csv.reader (downlist)
         # download list, as a first download link, as a second storage location 
        for Line in Lines: 
            TOTAL_COUNT +. 1 = 
            link =[]
            url='http://www.xxx.com/'+line[0]
            link.append(url)
            savePath='D:/saveFolder/'+line[1]+'.htm'
            link.append(savePath)
            pool.apply_async(func=get_page, args=(link,),callback=downloading_over)
    pool.close()
    pool.join()

 

Guess you like

Origin www.cnblogs.com/yanghao2008/p/11870482.html