Python3 多线程爬取梨视频

多线程爬取梨视频

from threading import Thread
import requests
import re


# 访问链接
def access_page(url):
    response = requests.get(url)
    return response


# 获取主页视频的id列表, 用来拼接视频详情页链接
def get_video_id(homepage_data):
    id_list = re.findall('<a href="video_(.*?)" .*?>', homepage_data, re.S)
    return id_list


# 获取视频链接列表
def get_video_url(detail_page_data):
    video_url = re.findall('srcUrl="(.*?)"', detail_page_data, re.S)[0]
    return video_url


# 获取视频名称
def get_video_name(detail_page_date):
    video_name = re.findall('<h1 class="video-tt">(.*?)</h1>', detail_page_date, re.S)[0]
    return video_name


# 保存视频
def save(video_data, name):
    with open(f'{name}.mp4', 'wb') as f:
        f.write(video_data)
        print(f'视频[{name}]下载成功!')


def run(id):
    # 拼接详情页链接并访问
    detail_page_url = 'https://www.pearvideo.com/video_' + id
    detail_page_data = access_page(detail_page_url).text
    # 获取视频名称和视频链接
    video_name = get_video_name(detail_page_data)
    video_url = get_video_url(detail_page_data)
    # 访问视频链接获取视频数据
    video_data = access_page(video_url).content
    # 保存视频数据
    save(video_data, video_name)


if __name__ == '__main__':

    homepage_data = access_page('https://www.pearvideo.com/').text
    id_list = get_video_id(homepage_data)

    # 多线程爬取
    for id in id_list:
        t = Thread(target=run, args=(id,))
        t.start()

猜你喜欢

转载自www.cnblogs.com/bigb/p/11735900.html