爬取京东商品信息并保存到MongoDB

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import pymongo



def get_goods(driver):
    #连接mongodb
    client=pymongo.MongoClient('localhost',27017)
    #指定数据库
    db=client.jd
    #指定表
    collection=db.messages

    number = 400
    # 循环拖动鼠标滚轮,使当前页的所有商品信息加载完成
    for line in range(20):
        js = 'window.scrollTo(0,%s)' % number
        number += 500
        driver.execute_script(js)
        time.sleep(0.1)
    # 拿到商品信息最大的div
    goods_div = driver.find_element_by_id('J_goodsList')
    # 找到每个商品的item
    goods_list = goods_div.find_elements_by_class_name('gl-item')
    # 循环拿到的所有商品
    good_item={}
    for good in goods_list:
        # 依次取出名字,价格,链接,图片,评论数
        good_item['名字'] = good.find_element_by_css_selector('.p-name em').text.replace("\n", "")
        good_item['价格'] = good.find_element_by_css_selector('.p-price').text.replace("\n", "")
        good_item['链接'] = good.find_element_by_css_selector('.p-img a').get_attribute('href')
        good_item['图片'] = good.find_element_by_css_selector('.p-img img').get_attribute('src')
        good_item['评论数'] = good.find_element_by_css_selector('.p-commit').text.replace("\n", "")
        # 将爬取的信息写入文件
        # with open('jd.txt', 'a', encoding='utf-8') as f:
        #     f.write(goods + '\n')

        #将数据插入mongodb
        good=dict(good_item)
        collection.insert_one(good)
    # 找到当前页的“下一页”按钮
    next_tag = driver.find_element_by_class_name('pn-next')
    next_tag.click()

    time.sleep(3)
    # 递归执行当前函数
    get_goods(driver)

# 获得谷歌控制器
driver = webdriver.Chrome()
try:
    # 使用控制器,打开京东网站
    driver.get('https://www.jd.com/')
    driver.implicitly_wait(10)
    # 得到文本搜索框
    input_tag = driver.find_element_by_id('key')
    # 写入搜索关键字
    input_tag.send_keys('电脑')
    # 回车
    # input_tag.send_keys(Keys.ENTER)
    #获取点击搜索按钮
    button = driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
    button.click()

    get_goods(driver)
    time.sleep(1000)
finally:
    driver.close()

猜你喜欢

转载自www.cnblogs.com/lnd-blog/p/11685567.html