1.import re # regular module
Requests Import
Import module regular # Re
# 1, sends a request for details pear video page, to obtain the corresponding data
Response = requests.get ( 'https://www.pearvideo.com/')
Print (response.status_code)
Print (Response. text)
# re.findall ( "regular matching rules," "parse text", "normal mode")
# re.S: refers to the global mode (the entire text matched)
# If you do not write, then progressive match;
# = re.findall RES ( 'Video _ (. *?)', response.text, re.S)
#video _ (. *?) where. is the current position, * denotes greedy match, find all ;; () denote extraction, without a direct match? denotes progressive
# extract found out, indicating a problem matching rules.
# Modify extraction rules: developer mode on - to find element- "find <a href .... -" then right-click copy, copy the desired part, such as <A href = "Video_
# 2, home video acquisition details page ID
= re.findall RES ( '.? <A href = "Video _ (*)', response.text, re.S)
for m_id in RES:
# stitching details page the URL of
detail_url = 'HTTPS: //peavideo.com/video_ '+ m_id
Print (detail_url)
specific code as follows:
import requests import re # regular module # Pear video details page send request to obtain the corresponding data response = requests.get('https://www.pearvideo.com/') print(response.status_code) print(response.text) res = re.findall('<a href="video_(.*?)',response.text,re.S) for m_id in res: detail_url='https://peavideo.com/video_'+m_id print(detail_url)