'' '' '' '' ' Https://movie.douban.com/top250?start=0&filter= https://movie.douban.com/top250?start=25&filter= https://movie.douban.com ? / TOP250 Start = 50 & filter = 1, the transmission request 2, the analysis data 3, data storage '' ' Import requests Import Re # crawler trilogy # 1, sends a request DEF the get_page (the base_url): Response = requests.get (the base_url) return Response # 2, parse text DEF parse_index (text): RES = the re.findall ( '<div class="item">.*?<em class="">(.*?)</em>.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>' '.*?导演:(.*?)</p>.*?<span class="rating_num".*?>(.*?)</span>.*?<span>(.*?)人评价</span>' '.*?<span class="inq">(.*?)</span>',text,re.S) #print(res) return res #3、保存数据 def save_data(data): with open('douban.txt','a',encoding='utf-8') as f: f.write(data) #main + 回车键 if __name__ == ' __Main__ ' : # NUM = 10 # the base_url = 'https://movie.douban.com/top250?start={}&filter='.format(num) NUM = 0 for Line in Range (10 ): the base_url = F ' https://movie.douban.com/top250?start={num}&filter= ' NUM + = 25 Print (the base_url) # . 1, the transmission request, the calling function Response = the get_page (the base_url) # 2, parse text movie_list = parse_index (response.text) # . 3, save data # data formatted for Movie in movie_list: # Print (Movie) # extract the assignment # movie rankings, movies url, film name, director - starring - the type of film evaluation, the number of evaluators, film synopsis v_top, v_url, v_name, v_daoyan, v_point, v_num, v_desc = movie movie_content = f '' ' movie rankings: {v_top} movies url: {v_url} movie name: {v_name} movie starring: {v_daoyan} movie ratings: {v_point} number of evaluators: {v_num} The movie: {v_desc} \ n- '' ' Print (movie_content) # save data save_data (movie_content)