Python——selenium爬取学科

import time
import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

browser = webdriver.Chrome()  #驱动谷歌浏览器

wait = WebDriverWait(browser, 3)
try:
    browser.get("https://souky.eol.cn/api/newapi/assess_result")
    wait.until(
        EC.presence_of_element_located((By.XPATH, '/html/body/div[4]/div[1]/ul/li[1]/div')),
    )
except TimeoutException:
    print('Timeout')


def click_according_text(text):
    try:
                button = browser.find_element_by_link_text(text)
                button.click()
    except:
                print(text+'不可点击')
#click_according_text("理学")

a=[1,17]
b=[1,14]
c=[1,36]
d=[1,9]
e=[1,9]
f=[1,5]
g=[1,5]



def click_locatin_element(element, text):
    try:
        button = browser.find_element_by_xpath(element)
        button.click()
    except:
        print(text + "不可点击")

def get_secien(element):
    button = browser.find_element_by_xpath(element)
    text = pd.DataFrame([(button.text)])
    text.to_csv('C:/Users/Administrator/Desktop/学科2014.csv', sep=',', mode='a',header=None,index=None)


    click_locatin_element(element,element)
    data = pd.read_html("https://souky.eol.cn/api/newapi/assess_result")[0]
    data.to_csv('C:/Users/Administrator/Desktop/学科2014.csv', sep=',', mode='a',header=None,index=None)
    time.sleep(3)


    for i in range(1,8):
        k=[18,15,37,10,10,6,6]
        for j in range(1,k[i-1]):
            element = "/html/body/div[4]/div[1]/ul/li["+str(i)+"]/ul/li["+str(j)+"]"
            get_secien(element)
发布了55 篇原创文章 · 获赞 17 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/weixin_43213658/article/details/88673290