爬虫项目6[爬取网易云python课程数据]

登录界面是在iframe表单中,要获取iframe表单中的数据必须先进入iframe表单

模块selenium
在定位网页中的数据时,如果标签是在iframe表单种,直接定位是找不到的,必须跳转到iframe表单中才能整个程序中最重要的就是selenium中进入iframe标签,代码如下:

iframe_element = browser.find_element_by_xpath()
browser.switch_to_frame(iframe_element) 
from selenium import webdriver
import time
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get("https://study.163.com/")
browser.maximize_window()

#同意协议按钮
agree_button = browser.find_element_by_xpath(".//span[@class='ux-btn th-bk-main ux-btn- ux-btn- ux-modal-btn um-modal-btn_ok th-bk-main']")
agree_button.click()

#关闭提示表单按钮
form_close = browser.find_element_by_xpath(".//i[@class='ux-icon ux-icon-close']")
form_close.click()

#登录按钮
login_button = browser.find_element_by_xpath(".//a[@class='f-fr j-nav-loginBtn loginBtn']")
login_button.click()
time.sleep(5)

#iframe元素
iframe_element = browser.find_element_by_xpath(".//div[@class='ux-modal mn-login-dialog ux-modal-fadeIn']//div[@id='j-ursContainer-1']/iframe")

browser.switch_to_frame(iframe_element)  #这一步最重要,要进入到iframe元素中,否则下边的都无法进行

tel = browser.find_element_by_xpath(".//input[@type='tel']")   #账号输入标签
password = browser.find_element_by_xpath(".//input[@class='j-inputtext dlemail'][1]")  #密码输入标签
submit = browser.find_element_by_xpath(".//a[@id='submitBtn'][1]")

tel.send_keys("13839817517")   #输入账号
password.send_keys("yhr104653")   #输入密码
submit.click()  #提交

#browser.switch_to.default_content()
#print(len(browser.page_source))
#print(browser.page_source)
browser.refresh()

#搜索输入框
search = browser.find_element_by_xpath(".//div[@class='m-indextopwrap f-pr']//div[@class='box  j-search f-cb']/input")
#提交搜索框
go_search = browser.find_element_by_xpath(".//div[@class='m-indextopwrap f-pr']//div[@class='submit j-submit f-pa']/span[text()='搜索']")
#输入要搜索的关键词
search.send_keys("python")
go_search.click()
time.sleep(5)


if __name__ == "__main__":
    while 1:
        try:
            next = browser.find_element_by_xpath(".//a[@class='th-bk-disable-gh']")
        except:
            next = None
        lesson_names = browser.find_elements_by_xpath(".//ul[@class='uc-course-list_ul']/li//span[@class='uc-ykt-coursecard-wrap_tit_name']")
        for i in range(len(lesson_names)):
            print(lesson_names[i].text)
        if next:
            next.click()
            time.sleep(5)
        else:
            break

发布了62 篇原创文章 · 获赞 13 · 访问量 2965

猜你喜欢

转载自blog.csdn.net/Yanghongru/article/details/104710405