python下载maizi学院视频

#!/usr/bin/env python
# -*- coding:utf-8 -*-

'''
    爬虫使用demo
    由于麦子视频使用了防盗链,需要在headers头加上Referer来源,不然下载的视频返回403
    demo: python maizi.py http://www.maiziedu.com/course/307/
'''

import re
import requests
from bs4 import BeautifulSoup
from multiprocessing.pool import Pool
import sys

class MaiZi():
    def __init__(self,url):
        self.url = url
        self.headers = {
            "User-Agent":"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36",
            "Referer":"http://www.maiziedu.com"
        }

    def parse_next_url(self):
        request = requests.get(self.url,headers=self.headers)
        request.encoding = request.apparent_encoding
        for url in BeautifulSoup(request.text,'lxml').select('ul.lesson-lists li a'):
            next_url = 'http://www.maiziedu.com/' + url['href']
            yield next_url

    def parse_content(self,url):
        request = requests.get(url,headers=self.headers)
        request.encoding = request.apparent_encoding
        regex = re.compile(r'\$lessonUrl = "(.*?)"')
        next_url = regex.findall(request.text)[0]
        #print(next_url)
        content = requests.get(next_url,headers = self.headers).content
        title = BeautifulSoup(request.text, 'lxml').select('span.selected')[0]['name']  # 视频的名称
        print('#'*20)
        print('download...')
        with open(title + '.mp4','wb') as e:
            e.write(content)
            print("下载完成:",title)


    def parse_pool(self):
        pool = Pool(4)
        pool.map(self.parse_content,self.parse_next_url())
        pool.close()



if __name__ == '__main__':
    #获取要下载的课程地址
    course = sys.argv[1]
    Run = MaiZi(course)
    Run.parse_pool()
复制代码

更多内容请访问 www.zxb8.cc

转载于:https://juejin.im/post/5d0984fc5188255c636e290e

猜你喜欢

转载自blog.csdn.net/weixin_33981932/article/details/93175074