python京东商品爬取

from selenium import webdriver
import time
import csv

class Jingdong:
    def __init__(self):
        self.url = 'https://www.jd.com/'
        self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36'}


    def getPage(self):
        opt = webdriver.ChromeOptions()
        opt.set_headless()
        self.driver = webdriver.Chrome(options=opt)
        self.driver.get(self.url)
        key = input('请输入商品:')
        self.driver.find_element_by_class_name('text').send_keys(key)
        # 点击搜索按钮
        self.driver.find_element_by_class_name('button').click()


    def parsePage(self):
        # 执行JS脚本,进度条拉到最下面
        self.driver.execute_script(
            'window.scrollTo(0,document.body.scrollHeight)'
        )
        # 给页面加载留出时间
        time.sleep(3)
        # 基准xpath,每个商品的节点对象列表
        rList = self.driver.find_elements_by_xpath('//div[@id="J_goodsList"]/ul/li')
        for r in rList:
            info = r.text.split('\n')
            # ¥52.80
            # Python编程从入门到实践python3.0绝技核心编程基础教程网络爬虫入门书籍
            # 500 + 条评价
            # 润知天下图书专营店
            price = info[0]
            if info[1] != '拍拍':
                name = info[1]
                commit = info[2]
                market = info[3]
            else:
                name = info[2]
                commit = info[3]
                market = info[4]
            L = [price, commit, market, name]
            self.saveCsv(L)
        if self.driver.page_source.find('pn-next disabled') == -1:
            self.driver.find_element_by_class_name('pn-next').click()
            time.sleep(2)

    def saveCsv(self,L):
        with open('京东.csv','a',newline='',encoding='gb18030') as f:
            writer = csv.writer(f)
            writer.writerow(L)

    def workOn(self):
        n = 1
        self.getPage()
        for i in range(3):
            self.parsePage()
            print('第%s页爬取成功'%n)
            n += 1


if __name__ == '__main__':
    print('start')
    spider = Jingdong()
    spider.workOn()

猜你喜欢

转载自blog.csdn.net/lichong2259/article/details/87940091