Reptile Requests Request Process

Reptiles and the three-part song

1. The transmission request

def get_page(url):
   respone=requests.get(url)
   return respone

2. Parse the data

import re
def parse_index(html):

#findall all matches

# Re.findall ( "match all", html, re.S)

# Re.S (for the full text of match)

detail_urls=re.findall('<div class="items"><a class="imglink" href="(.*?)"',html,re.S)
   return detail_urls

# Parse details page

def parse_detail(html):
  movie_url=re.findall('<source src="(.*?)">',html,re.S)
  if movie_url:
    return movie_url[0]

3. Save data

import uuid

# Uuid.uuid4 () for some of the world's only string generated according to the time stamp

DEF save_video (Content): 
  with Open (F ' . uuid.uuid4 {()} MP4 ' , ' WB ' ) AS F: 
   f.write (Content) 
   Print ( ' video downloaded ... ' )

# Main + Enter key

Test Case:

if_name_='_main_'
for line in range(5):
 url=f'http://www.xiaohuar.com/list-3-{line}.html'

#send request

response=get_page(url)
#print(response)

# Returns a response status code

#print(respone.status_code)

# Returns a response text

#print(response.text)

# Parse Home page

detail_urls=parse_index(response.text)

# Loop through the details page url

for detail_url in detail_urls:
      #print(detail_url)

 # Every detail page to send a request

      detail_res=get_page(detail_url)
      #print(response.text)

# Parse details page for the video url

      move_url=parse_detail(detail_res.text)

# Judge video url exists Print

      if move_url:
          print(move_url)

# Binary stream to acquire a video transmission request video url

      move_res=get_page(move_url)

# Binary video spread to save_video functions to save to your local

     save_video(move_res.content)

 

Guess you like

Origin www.cnblogs.com/changgeyimeng/p/11115811.html