This code implements a multi-process download download specified list of functions.
Considerations include:
1, the download process, show the total number of existing, downloaded, errors, and other remaining information in order to keep abreast of progress.
2, the number of retries can be specified for each download (as specified in the program)
3, the number of processes, list of downloads are command line arguments
4, save the need to specify the location in the program
# -*- coding: utf-8 -*- """ Created on Sat Nov 16 07:52:40 2019 @author: mi """ import requests import os import csv exist_count=0#已存在 downloaded_count=0#已下载 total_count=0#总数 error_count=0#出错 def downloading_over(arg): global downloaded_count global total_count global exist_count global error_count print("返回状态:",arg) if arg=='EXISTS': exist_count+=1 if arg=='SUCCESS': downloaded_count+=1 if arg=='ERROR': error_count+=1 print('总数:%s / 已存在:%s / 已下载:%s / 出错:%s / 剩余:%s' % (str(total_count),str(exist_count),str(downloaded_count),str(error_count),str(total_count-exist_count-downloaded_count-error_count))) def get_page(link): url=link[0] savePath=link[1] print(savePath) if os.path.exists(savePath): print('已存在') return 'EXISTS' times=3 while (times>0): times=times-1 try: resp=requests.get(url,timeout=30) except requests.RequestException as e: print(e) continue if not os.path.exists(os.path.dirname(savePath)): os.makedirs(os.path.dirname(savePath)) with open(savePath,'wb')as fw: fw.write(resp.content) return 'SUCCESS' else: return 'ERROR' from multiprocessing import Pool import sys if __name__ == "__main__": __spec__ = "ModuleSpec(name='builtins', loader=<class '_frozen_importlib.BuiltinImporter'>)" process_num=sys.argv[1] print('The number of processes:' + Process_num) download_list = the sys.argv [2 ] Print ( ' Download List: ' + download_list) the pool = Pool (Processes = int (process_num)) # SET max Number The Processes with Open (download_list, ' R & lt ' , encoding = ' UTF-. 8 ' ) AS downlist: Lines = csv.reader (downlist) # download list, as a first download link, as a second storage location for Line in Lines: TOTAL_COUNT +. 1 = link =[] url='http://www.xxx.com/'+line[0] link.append(url) savePath='D:/saveFolder/'+line[1]+'.htm' link.append(savePath) pool.apply_async(func=get_page, args=(link,),callback=downloading_over) pool.close() pool.join()