content
foreword
As the world's largest video website, it is almost entirely written in Python.
The website is currently an online video service provider in the industry. The website's system processes tens of millions of video clips every day, serving thousands of video clips around the world. Users provide high-level video uploading, distribution, display and browsing services. In February 2015, CCTV pushed the Spring Festival Gala to the website for the first time.
Today, we're going to use Python to quickly batch download videos from this website
development environment
version: python 3.8
editor: pycharm 2021.2
3rd party modules: requests + tqdm
Required modules
import requests
import re
import json
from tqdm import tqdm
import os
start coding
request data
headers = {
'cookie': 'VISITOR_INFO1_LIVE=9qZVrzB27uI; PREF=f4=4000000&tz=Asia.Shanghai; _ga=GA1.2.621834420.1648121145; _gcl_au=1.1.1853038046.1648121145; NID=511=Zc1APdmEbCD-iqVNVgI_vD_0S3LVI3XSfl-wUZEvvMU2MLePFKsQCaKUlUtchHSg-kWEVMGOhWUbxpQMwHeIuLjhxaslwniMh1OsjVfmOeTfhpwcRYpMgqpZtNQ7qQApY21xEObCvIez6DCMbjRhRQ5P7siOD3X87QX0CFyUxmY; OTZ=6430350_24_24__24_; GPS=1; YSC=0E115KqM_-I; GOOGLE_ABUSE_EXEMPTION=ID=d02004902c3d0f4d:TM=1648620854:C=r:IP=47.57.243.77-:S=YmZXPW7dxbu83bDuauEpXpE; CONSISTENCY=AGDxDeNysJ2boEmzRP4v6cwgg4NsdN4-FYQKHCGhA0AeW1QjFIU1Ejq1j8l6lwAc6c-pYTJiSaQItZ1M6QeI1pQ3wictnWXTOZ6_y8EKlt0Y_JdakwW6srR39-NLuPgSgXrXwtS0XTUGXpdnt4k3JjQ',
'referer': 'https://www.youtube.com/results?search_query=jk%E7%BE%8E%E5%A5%B3',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'
}
url = 'https://www.有土比.com/watch?v=ImoXcSpR_io'
response = requests.get(url=url, headers=headers)
Analytical data
json_str = re.findall('var ytInitialPlayerResponse = (.*?);var', response.text)[0]
json_data = json.loads(json_str)
video_url = json_data['streamingData']['adaptiveFormats'][0]['url']
audio_url = json_data['streamingData']['adaptiveFormats'][-2]['url']
title = json_data['videoDetails']['title']
title = title.replace(' ', '')
title = re.sub(r'[\/:|?*"<>]', '', title)
video data
video_pbar = tqdm(total=file_size)
with open(f'{
title}.mp4', mode='wb') as f:
for video_chunk in video.iter_content(1024*1024*2):
f.write(video_chunk)
video_pbar.set_description(f'正在下载{
title}视频中......')
video_pbar.update(1024*1024*2)
video_pbar.set_description('下载完成!')
video_pbar.close()
audio data
audio = requests.get(audio_url, stream=True)
file_size = int(audio.headers.get('Content-Length'))
audio_pbar = tqdm(total=file_size)
with open(f'{
title}.mp3', mode='wb') as f:
for audio_chunk in audio.iter_content(1024*1024*2):
f.write(audio_chunk)
audio_pbar.set_description(f'正在下载{
title}音频中......')
audio_pbar.update(1024*1024*2)
audio_pbar.set_description('下载完成!')
audio_pbar.close()
Combine audio and video
def merge(title):
ffmpeg = r'D:\Download\ffmpeg\bin\ffmpeg.exe -i ' + title + '.mp4 -i ' + title + '.mp3 -acodec copy -vcodec copy ' + title + '-out.mp4'
os.popen(ffmpeg)