# This program is learning the code successfully crawling all video files 'pear video' site and stored in the video folder Import os Import Re Import Requests def getHTMLText(url): try: r=requests.get(url) r.raise_for_status() r.encoding=r.apparent_encoding return r.text except: print("request failed") url = ' https://www.pearvideo.com/category_8 ' baseurl = ' https://www.pearvideo.com/ ' video_data = getHTMLText (url) # determine whether there is video folder, not the new IF not os .path.isdir ( ' ./video ' ): os.makedir ( ' ./video ' ) res = re.compile(r'(video_[0-9]{7})') video_list = re.findall(res, video_data) # Build all video file URL for i in video_list: video_url = baseurl + i # video_url is the address of the video file pages, not an actual video file Download # request video site url, reads the video content of the document the try : text = getHTMLText (video_url) # returns html file video page RES = re.compile (r ' srcUrl = "(HTTPS: //.+ MP4)?" ' ) FINAL_URL = the re.findall (RES, text) [0] # returns a true video file Download video_content = requests.get (FINAL_URL) .content # acquires the content of video files (binary stream) the except : Print ( ' % S file content acquisition failure! ' % i) Open with ( ' ./video/ ' + I + ' .mp4 ' , ' WB ' ) AS F: Print ( ' Saving video ...... S% ' % I) f.write(video_content) f.flush()