Python爬虫抓取豆瓣top250电影

Python爬虫抓取豆瓣top250电影
 

import requests
from bs4 import BeautifulSoup
link="https://movie.douban.com/top250?start="
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
         'Host':'movie.douban.com'}

movie_list=[]
for i in range(0,10):
    tempLink=link+str(i*25)
    r=requests.get(tempLink,headers=headers)
    soup=BeautifulSoup(r.text,"lxml")
    div_list=soup.find_all('div','hd')
    #print(str(len(div_list)))
    for each in div_list:
        movie=each.a.span.text.strip()
        movie_list.append(movie)
print(str(len(movie_list)))
liststr=""
for mo in movie_list:
    liststr=liststr+"\n"+mo
    print(mo)
    
with open("top250电影.txt","a+") as file:
    file.write(liststr)
    file.close()
#print(r.status_code)

猜你喜欢

转载自blog.csdn.net/zhengjian1996/article/details/112941698