Python usa solicitudes para descargar archivos grandes para obtener velocidad en tiempo real/visualización de la barra de progreso/resumen del punto de interrupción

import os
import time
import logging
import requests
from urllib.parse import unquote
from contextlib import closing


chunkSize = 1024 * 1024
loop = 5

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
    }


def speed_handle(process, file_length):
    if process != file_length:
        num = process / file_length
        progress = ': \033[1;33m{:.2f}\033[0m%|{}{}| ' .format(float(num * 100), '■' * round(num * 20), '□' * round((1 - num) * 20))
    else:
        progress = ' \033[1;33m{}\033[0m% |{}|'.format(100, '■' * 50)
    print(progress, flush=True, end='')
 

def get_file_name(url, headers):
    filename = ''
    if 'Content-Disposition' in headers and headers['Content-Disposition']:
        disposition_split = headers['Content-Disposition'].split(';')
        if len(disposition_split) > 1:
            if disposition_split[1].strip().lower().startswith('filename='):
                file_name = disposition_split[1].split('=')
                if len(file_name) > 1:
                    filename = unquote(file_name[1])
    if not filename and os.path.basename(url):
        filename = os.path.basename(url).split("?")[0]
    if not filename:
        return time.time()
    return filename


def file_download(fileUrl, filePath):

    response = requests.get(fileUrl, headers=headers,stream=True)
    fileSize = int(response.headers['content-length'])  # 文件大小
    
    tmpSize = 0
    n = 0
    isDownloaded = False
    while n < loop:
   
        if os.path.exists(filePath):    # 判断文件是否存在
            tmpSize = os.path.getsize(filePath)
          
        _headers = {"Range": "bytes={}-{}".format(tmpSize, fileSize),
                    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"}

        # headers.update({"Range": "bytes={}-{}".format(tmpSize, fileSize)})           

        contentSize = 0
        remainSize = (fileSize - tmpSize) / chunkSize
        filename = os.path.basename(filePath)
      
        st = time.perf_counter()

        if remainSize > 0:
   
            with closing(requests.get(fileUrl, headers=_headers, stream=True)) as _response, open(filePath, "ab") as file:
                for content in _response.iter_content(chunk_size=chunkSize):

                    file.write(content)
                    timeTook = time.perf_counter() - st
                    contentSize += len(content) / chunkSize
                    print('\r{}/{}: {}'.format(cnt+1, len(fileUrls), filename), flush=True, end='')
                    
                    speed_handle(contentSize+tmpSize/chunkSize, fileSize/chunkSize)
                    downloadSpeed = contentSize / timeTook # 平均下载速度
                    remainingTime = int(timeTook / (contentSize/remainSize) - timeTook) # 估计剩余下载时间

                    print('[' + 'average speed: \033[1;31m{:.2f}MiB/s\033[0m, remaining time: \033[1;32m{}s\033[0m, file size: \033[1;34m{:.2f}MiB\033[0m'.format(
                        downloadSpeed,
                        remainingTime,
                        fileSize/chunkSize) + ']', flush=True, end=' '
                    )
        else:
            isDownloaded = True
            break
       
        n += 1
    
    return isDownloaded


if __name__ == '__main__':
    
    urlTxt = './url.txt'

    pathSave = '/data2/sam_down'
    os.makedirs(pathSave, exist_ok=True)

    logging.basicConfig(level=logging.INFO, filename='downloading.log', filemode='a', format="%(message)s")
    localtime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    logging.info(localtime + ': Start downloading task: {}'.format(urlTxt))
    failedUrl = []

    with open(urlTxt, "r") as f:
        fileUrls = [line.strip() for line in f.readlines()]

        for cnt, fileUrl in enumerate(fileUrls):
            
            filename = get_file_name(fileUrl, headers)  # 获取文件名称

            try:
                t0 = time.perf_counter()
                isDload = file_download(fileUrl, os.path.join(pathSave, filename))
                t1 = time.perf_counter()
                localtime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

                if isDload:
                    logging.info(localtime + ': {} download successfully! Time consuming: {:.3f}s'.format(filename, t1-t0))
                else:
                    # os.remove(os.path.join(pathSave, filename))
                    logging.info(localtime + ': {} download failed! Url: {}'.format(filename, fileUrl))
                    failedUrl.append(fileUrl)
                
            except:
                failedUrl.append(fileUrl)

    if len(failedUrl):

        with open('failedUrl.txt','w') as p:
            for url in failedUrl:
                p.write(url + '\n')

    fn = len(failedUrl)
    sn = len(fileUrls) - fn
    print("\n{} file{} download successfully, {} file{} download failed!".format(sn, 's'*(sn>1), fn, 's'*(fn>1)))

El efecto es el siguiente:

 

Supongo que te gusta

Origin blog.csdn.net/qq_36801705/article/details/131299406
Recomendado
Clasificación