Reptiles and the three-part song
1. The transmission request
def get_page(url): respone=requests.get(url) return respone
2. Parse the data
import re def parse_index(html):
#findall all matches
# Re.findall ( "match all", html, re.S)
# Re.S (for the full text of match)
detail_urls=re.findall('<div class="items"><a class="imglink" href="(.*?)"',html,re.S) return detail_urls
# Parse details page
def parse_detail(html): movie_url=re.findall('<source src="(.*?)">',html,re.S) if movie_url: return movie_url[0]
3. Save data
import uuid
# Uuid.uuid4 () for some of the world's only string generated according to the time stamp
DEF save_video (Content): with Open (F ' . uuid.uuid4 {()} MP4 ' , ' WB ' ) AS F: f.write (Content) Print ( ' video downloaded ... ' )
# Main + Enter key
Test Case:
if_name_='_main_' for line in range(5): url=f'http://www.xiaohuar.com/list-3-{line}.html'
#send request
response=get_page(url) #print(response)
# Returns a response status code
#print(respone.status_code)
# Returns a response text
#print(response.text)
# Parse Home page
detail_urls=parse_index(response.text)
# Loop through the details page url
for detail_url in detail_urls: #print(detail_url)
# Every detail page to send a request
detail_res=get_page(detail_url) #print(response.text)
# Parse details page for the video url
move_url=parse_detail(detail_res.text)
# Judge video url exists Print
if move_url: print(move_url)
# Binary stream to acquire a video transmission request video url
move_res=get_page(move_url)
# Binary video spread to save_video functions to save to your local
save_video(move_res.content)