import requests respones=requests.get(url='http://www.baidu.com') respones.encoding='UTF-8' print(respones.text) with open ('badui.html','w',encoding='UTF-8')as f: f.write (respones.text) # climb baidu home page written in HTML
Today's study:
Climb watercress
https://movie.douban.com/top250?start=0&filter= https://movie.douban.com/top250?start=25&filter= https://movie.douban.com/top250?start=50&filter= A Request to Send 2 . Analytical data 3 . Save the data '' ' import requests import re # Reptile three-part song # 1 sends a request def get_page(base_url): response = requests.get(base_url) return response # 2. parse text def parse_index(text): res = re.findall('<div class="item">.*?<em class="">(.*?)</em>.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?导演:(.*?)</p>.*?<span class="rating_num".*?>(.*?)</span>.*?<span>(.*?)人评价</span>.*?<span class="inq">(.*?)</span>', text, re.S) # print(res) return res # 3. Save data def save_data(data): with open('douban.txt', 'a', encoding='utf-8') as f: f.write(data) # Main + Enter key if __name__ == '__main__': # A = 10 # base_url = 'https://movie.douban.com/top250?start={}&filter='.format(num) a = 0 for line in range(10): base_url = f'https://movie.douban.com/top250?start={num}&filter=' a = + 25 print(base_url) # 1 sends a request, the calling function response = get_page(base_url) # 2. parse text movie_list = parse_index(response.text) # 3. Save data # Formatted data for movie in movie_list: # print(movie) # Decompression assignment Ranked # movie, movies url, film name, director - starring - the type of movie scores, number of reviews, film synopsis v_top, v_url, v_name, v_daoyan, v_point, v_num, v_desc = movie # v_top = movie[0] # v_url = movie[1] moive_content = f''' Movie Ranking: {v_top} Film url: {v_url} Movie Name: {v_name} Director Starring: {v_daoyan} Movie rating: {v_point} Number of Evaluation: {v_num} Movie Synopsis: {v_desc} \n ''' print(moive_content) # save data save_data(moive_content)