python爬取(自动化)豆瓣电影影评,并存储。

from selenium import webdriver
from selenium.webdriver import ActionChains
import time

driver = webdriver.Chrome(r'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') #自动化侧是驱动程序本地所纺地址
driver.get('https://accounts.douban.com/passport/login?source=movie')#豆瓣电影登录口
time.sleep(4)

#创建事件对象
action = ActionChains(driver)

#获取目标元素
login = driver.find_element_by_class_name('account-tab-account')
#执行运行
action.click(login).perform()

username = driver.find_element_by_name('username')
password = driver.find_element_by_name('password')


username.send_keys('********') #写入自己的用户名,自己更改
password.send_keys('*******') #写入自己的密码


#获取登录按钮
loginbtn = driver.find_element_by_link_text('登录豆瓣')
#执行运行
action.click(loginbtn).perform()
time.sleep(5)

urls = ["https://movie.douban.com/subject/26794435/comments?start=%s&limit=20&sort=new_score&status=P"%i for i in range(0,481,20)]
index = 0
for url in urls:
index+=1
driver.get(url)
time.sleep(3)
data = driver.page_source
with open("./temple/%s.html"%index,"w",encoding='utf-8') as f:
f.write(data)
time.sleep(3)
with open('./评论/评论.text', 'a', encoding='utf-8') as h:
read = driver.find_elements_by_class_name("short")
for j in range(0, len(read)):
h.write(''.join(read[j].text).strip().replace('\n',''))
print(''.join(read[j].text).strip().replace('\n',''))
time.sleep(3)
driver.close()

猜你喜欢

转载自www.cnblogs.com/superSmall/p/11520893.html
今日推荐