Crawler 04-NetEase Cloud Review

from selenium import webdriver
import time


def saveData(text):
    with open(r'C:\Users\Administrator\Desktop\04\data.txt','a',encoding='utf-8') as f:
        f.write(text+'\n')


class  YunSpider(object):
    # 初始化方法
    def __init__(self,url):
        self.url = url
        self.driver = webdriver.Chrome()

    # 打开网站,提取数据,翻页
    def getcontent(self):
        self.driver.get(self.url)
        # html上有IFrame,要先进入IFrame中
        self.driver.switch_to.frame(0)# 进入第一个框
        js = 'window.scrollBy(0,8000)'
        self.driver.execute_script(js)

        for page in range(5):
            # 保存数据
            selectors = self.driver.find_elements_by_xpath('//div[@class="cmmts j-flag"]/div')
            for selector in selectors:
                # text = selector.find_elements_by_xpath('.//div[@class="cnt f-brk"]').text
                # elements 要注意这个坑 一个是元素们,一个是元素
                text = selector.find_element_by_xpath('.//div[@class="cnt f-brk"]').text
                saveData(text)
            # 找到下一页的元素进行点击
            nextPage = self.driver.find_element_by_partial_link_text("下一页")
            nextPage.click()
            time.sleep(.5)


if __name__ == '__main__':
    base_url = 'https://music.163.com/#/song?id=417250673'
    yun = YunSpider(base_url)
    yun.getcontent()


配置selenium-https://blog.csdn.net/weixin_44352981/article/details/90714826

Learning address: https://www.bilibili.com/video/BV1pE411B7BL?t=3516

Guess you like

Origin blog.csdn.net/qq_41458842/article/details/106244772