python爬取京东商品页面信息

from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.keys import Keys
import time

# 输入搜索的关键字
key_words = input("这位客官!请您输入要在京东查询爬取的商品:")
# 用这个跳过谷歌浏览器的自动化检测
option = ChromeOptions()
option.add_argument('disable-infobars')

#定义一个get方法
def get_goods(driver,key_words):
    number = 400
    # 一直循环当前页面,使其加载完成后跳到下一页
    for line in range(20):
        js = 'window.scrollTo(0,%s)' % number
        number += 500
        driver.execute_script(js)
        time.sleep(0.1)
    # 获取到商品信息最大的div
    goods_div = driver.find_element_by_id('J_goodsList')
    print(goods_div)
    # 发现每个商品的item标签
    goods_list = goods_div.find_elements_by_class_name('gl-item')
    print(goods_list)
    # 循环拿到的所有商品
    for good in goods_list:
        # 依次取出名字,价格,链接,图片,评论
        good_name = good.find_element_by_css_selector('.p-name em').text.replace("\n", "")
        good_price = good.find_element_by_css_selector('.p-price').text.replace("\n", "")
        good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href')
        good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src')
        good_commit = good.find_element_by_css_selector('.p-commit').text.replace("\n", "")
        good_shop = good.find_element_by_css_selector('.p-shop').text.replace("\n", "")
        good_discounts = good.find_element_by_css_selector('.p-icons').text.replace("\n", "")
        goods = '''
                   商品信息:
                       名称  %s
                       价格  %s
                       链接  %s
                       图片  %s
                       评价  %s
                       店铺  %s
                       优惠  %s
                   ''' % (good_name, good_price, good_link, good_img, good_commit,good_shop,good_discounts)
        # 将爬取的信息写入文件
        with open('京东---%s.txt'%key_words, 'a', encoding='utf-8') as f:
            f.write(goods + '\n')
    # 找到当前页的“下一页”按钮
    next_tag = driver.find_element_by_class_name('pn-next')
    next_tag.click()

    time.sleep(3)
    get_goods(driver,key_words)


# 获得谷歌控制器
driver = webdriver.Chrome(executable_path=r'D:\Python2020-邱勋涛\爬虫self\淘宝\chromedriver.exe',
                          chrome_options=option)
try:

    # 使用控制器,打开京东网站
    driver.get('https://www.jd.com/')
    driver.implicitly_wait(10)
    # 得到文本搜索框
    input_tag = driver.find_element_by_id('key')

    key=input_tag.send_keys(key_words)
    # 回车
    input_tag.send_keys(Keys.ENTER)
    get_goods(driver,key_words)
    time.sleep(1000)
finally:
    driver.close()

效果:

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/Qiuxuntao/article/details/119255071