Crawling a page of videos according to the rules of regular method

1.import re # regular module

 

Requests Import 
Import module regular # Re

# 1, sends a request for details pear video page, to obtain the corresponding data
Response = requests.get ( 'https://www.pearvideo.com/')
Print (response.status_code)
Print (Response. text)

# re.findall ( "regular matching rules," "parse text", "normal mode")
# re.S: refers to the global mode (the entire text matched)
# If you do not write, then progressive match;

# = re.findall RES ( 'Video _ (. *?)', response.text, re.S)

#video _ (. *?) where. is the current position, * denotes greedy match, find all ;; () denote extraction, without a direct match? denotes progressive
# extract found out, indicating a problem matching rules.
# Modify extraction rules: developer mode on - to find element- "find <a href .... -" then right-click copy, copy the desired part, such as <A href = "Video_


# 2, home video acquisition details page ID
= re.findall RES ( '.? <A href = "Video _ (*)', response.text, re.S) 

for m_id in RES:
  # stitching details page the URL of
detail_url = 'HTTPS: //peavideo.com/video_ '+ m_id
Print (detail_url)


specific code as follows:
import  requests
import re # regular module

# Pear video details page send request to obtain the corresponding data
response = requests.get('https://www.pearvideo.com/')
print(response.status_code)
print(response.text)

res = re.findall('<a href="video_(.*?)',response.text,re.S)

for m_id in res:
    detail_url='https://peavideo.com/video_'+m_id
    print(detail_url)

  

 

Guess you like

Origin www.cnblogs.com/evan0925/p/11021655.html