python crawling (automation) watercress movie critics, and storage.

Import the webdriver Selenium from
from selenium.webdriver Import ActionChains
Import Time

Driver = webdriver.Chrome (r'C: \ Program Files (the x86) \ the Google \ the Chrome \ the Application \ chromedriver.exe ') is the driver side Automation # spun local addresses
driver.get ( 'https://accounts.douban.com/passport/login?source=movie') # login IMDb opening
the time.sleep (4)

# Creates an event object
Action = ActionChains (Driver)

# get the target element
login driver.find_element_by_class_name = ( 'Account-Account-Tab')
# operation performed
action.click (Login) .perform ()

username = driver.find_element_by_name ( 'username')
password = driver.find_element_by_name ( 'password')


username.send_keys ( '********') # write your user name, your own change
password.send_keys ( '*******') # Write your password


# to obtain the login button
loginbtn = driver.find_element_by_link_text('登录豆瓣')
#执行运行
action.click(loginbtn).perform()
time.sleep(5)

urls = ["https://movie.douban.com/subject/26794435/comments?start=%s&limit=20&sort=new_score&status=P"%i for i in range(0,481,20)]
index = 0
for url in urls:
index+=1
driver.get(url)
time.sleep(3)
data = driver.page_source
with open("./temple/%s.html"%index,"w",encoding='utf-8') as f:
f.write(data)
time.sleep(3)
with open('./评论/评论.text', 'a', encoding='utf-8') as h:
read = driver.find_elements_by_class_name("short")
for j in range(0, len(read)):
h.write(''.join(read[j].text).strip().replace('\n',''))
print(''.join(read[j].text).strip().replace('\n',''))
time.sleep(3)
driver.close()

Guess you like

Origin www.cnblogs.com/superSmall/p/11520893.html