Python Training Day - about reptiles

# 1.requests basic use 
Import
requests # import request requests Library # Baidu page transmission request to acquire the response object Response = requests.get (URL = ' https://www.baidu.com/ ' ) # Set character encoding to utf -8 response.encoding = ' UTF-. 8 ' # printing text in response Print (response.text) # the local response text written with Open ( ' baidu.html ' , ' W ' , encoding = ' UTF-. 8 ' ) AS f: f.write (response.text)
# 2.
'' '' '' 
'' '
Video options:
Pear Video
' ''
# Import Requests
#
# # to the source address of the video transmission request
# requests.get Response = (
# 'https://video.pearvideo.com/mp4/ adshort / 20,190,625 / 1570302-14057031_adpkg-ad_hd.mp4-CONT ')
#
# # print binary stream, such as pictures, video, data
# Print (response.content)
#
# # locally stored video
# with open (' video .mp4 ',' wb ') AS f:
# f.write (response.content)

' ''
1, first sending a request to the Home video pear
https://www.pearvideo.com/

resolve all videos get id:
video_1570302

Re. findAll ()


2, to obtain the video details page url:
! thrill man robbed on the subway slipped on foot into the
https: //www.pearvideo.com / video_1570302
Secret Karez
https://www.pearvideo.com/video_1570107
'' '
Import Requests
Import Re # regular, for parsing the text data
# 1, first transmission request to the Video Home pears
response = requests.get (' https: //www.pearvideo .com / ')
# Print (response.text)

# Re regular video matching accessories ID
# parameter 1: regular matching rule
# 2 parameters: parse text
# 3 parameters: pattern matching
res_list = re.findall (' <a href = "(.? *) video _" ', response.text, re.S)
# Print (res_list)

# stitching each video detail page url
for v_id in res_list:
detail_url =' https://www.pearvideo.com/video_ '+ v_id
# Print (detail_url)

# send a request for each page for a video before the video source url
Response = requests.get (url = detail_url)
# Print (response.text)

# parses and extracts the page before the video url
# video url
= re.findall video_url ( 'srcUrl = "(. *?)"', response.text, re.S) [0]
Print (video_url)

# video name
video_name = re.findall (
'<h1 class = "Video- TT "> (. *?) </ h1 of> ', response.text, re.S) [0]

Print (video_name)

# url video transmission request to acquire a video stream of binary
v_response = requests.get (video_url)

with Open ( 's.mp4%'% video_name, 'WB') AS F:
f.write (v_response.content)
Print (video_name, 'video crawling complete')

# 3 crawling IMDb Top250 
'' '
https://movie.douban.com/top250?start=0&filter=
https://movie.douban.com/top250?start=25&filter=
HTTPS: //movie.douban ? .com / TOP250 Start = 50 & filter =

1. send request
2. parse the data
3. save the data
'' '
Import requests
Import Re

# crawler trilogy
# 1 sends a request
DEF the get_page (the base_url):
Response = requests.get ( base_url)
return the Response

# 2. parse the text
DEF parse_index (text):

RES = re.findall ( '<div class = "Item"> * <EM class = ""> (*.?.?) </ EM>. ? * <a href="(.*?)"> * <span class = "title"> </ span> * director.? (*.?): .? (.? *) </ p> *. ? <span class = "rating_num" . *?> (. *?) </span>.*?<span>(.*?)人评价</span>.*?<span class="inq">(.*?)</span>', text, re.S)
# print(res)
RES return

# 3. Save the data
DEF save_data (Data):
with Open ( 'douban.txt', 'A', encoding = 'UTF-. 8') AS F:
f.write (Data)

# + Enter key main
if == the __name__ '__main__':
# 10 NUM =
# = the base_url 'https://movie.douban.com/top250?start={}&filter='.format(num)

NUM = 0
for Line Range in (10):
= f'https the base_url: //movie.douban.com/top250 NUM} {Start = & filter = '?
NUM = + 25
Print (the base_url)

# 1. sending a request, the calling function
Response = the get_page (the base_url)

# 2. analytical text
movie_list = parse_index (response.text)

#. 3.Save data
# data format
for movie in movie_list:
# Print (Movie)

# extract the assignment
# movie rankings, movies url, film name, director - starring - the type of movie scores, number of reviews, film synopsis
V_TOP, v_url, v_name, v_daoyan, v_point, v_num, v_desc = Movie
# V_TOP = movie [0]
# v_url = movie [1]
moive_content = f '' '
movie rankings: {v_top}
movies url: {v_url}
movie name: {v_name}
director starring: {v_daoyan}
movie ratings: {v_point}
number of evaluators: {v_num}
The movie: v_desc} {
\ n-
'' '

Print (moive_content)

# save data
save_data (moive_content)
 

 

Guess you like

Origin www.cnblogs.com/dadahappy/p/11094405.html