Python reptile section of the complete code for batch downloading web video resources (note solution)

# This program is learning the code successfully crawling all video files 'pear video' site and stored in the video folder 
Import os
 Import Re
 Import Requests

def getHTMLText(url):
    try:
        r=requests.get(url)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        print("request failed")

url = ' https://www.pearvideo.com/category_8 ' 
baseurl = ' https://www.pearvideo.com/ ' 
video_data = getHTMLText (url)
 # determine whether there is video folder, not the new 
IF  not os .path.isdir ( ' ./video ' ):
    os.makedir ( ' ./video ' )

res = re.compile(r'(video_[0-9]{7})')
video_list = re.findall(res, video_data)


# Build all video file URL 
for i in video_list:
    video_url = baseurl + i # video_url is the address of the video file pages, not an actual video file Download 
    # request video site url, reads the video content of the document 
    the try :
        text = getHTMLText (video_url) # returns html file video page 
        RES = re.compile (r ' srcUrl = "(HTTPS: //.+ MP4)?" ' )
        FINAL_URL = the re.findall (RES, text) [0] # returns a true video file Download 
        video_content = requests.get (FINAL_URL) .content # acquires the content of video files (binary stream) 
    the except :
         Print ( ' % S file content acquisition failure! ' % i)
    Open with ( ' ./video/ ' + I + ' .mp4 ' , ' WB ' ) AS F:
         Print ( ' Saving video ...... S% ' % I)
        f.write(video_content)
        f.flush()

 

Guess you like

Origin www.cnblogs.com/iceberg710815/p/12240415.html