保存网页内容到excel

from selenium import webdriver
from time import sleep
from selenium.common.exceptions import NoSuchElementException
from openpyxl import Workbook

driver = webdriver.Chrome()
driver.get("https://tieba.baidu.com/index.html")

#定位搜索框
driver.find_element_by_xpath('//input[@id="wd1"]').send_keys("孙茂书")

#定位搜索按钮
driver.find_element_by_xpath('//a[text()="全吧搜索"]').click()
sleep(3)

#打开excel表格
wb = Workbook()
ws = wb.active
ws.append(["标题", "内容"])
row = 2
row2 = 2

while True:
title_list = list()
try:
#定位标题
all_title = driver.find_elements_by_xpath('//span[@class="p_title"]')
all_content = driver.find_elements_by_xpath('//div[@class="p_content"]')
for title in all_title:
ws.cell(row, 1, title.text)
print("保存标题到excel表格中")
row = row + 1

for content in all_content:
ws.cell(row2, 2, content.text)
print("保存内容到excel表格中")
row2 = row2 + 1

wb.save("./data.xlsx")
# 定位下一页
driver.find_element_by_xpath('//a[@class="next"]').click()
sleep(3)

except NoSuchElementException as E:
break

猜你喜欢

转载自www.cnblogs.com/mary-ding/p/12190444.html