from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.common.keys import Keys
import time
# 输入搜索的关键字
key_words = input("这位客官!请您输入要在京东查询爬取的商品:")
# 用这个跳过谷歌浏览器的自动化检测
option = ChromeOptions()
option.add_argument('disable-infobars')
#定义一个get方法
def get_goods(driver,key_words):
number = 400
# 一直循环当前页面,使其加载完成后跳到下一页
for line in range(20):
js = 'window.scrollTo(0,%s)' % number
number += 500
driver.execute_script(js)
time.sleep(0.1)
# 获取到商品信息最大的div
goods_div = driver.find_element_by_id('J_goodsList')
print(goods_div)
# 发现每个商品的item标签
goods_list = goods_div.find_elements_by_class_name('gl-item')
print(goods_list)
# 循环拿到的所有商品
for good in goods_list:
# 依次取出名字,价格,链接,图片,评论
good_name = good.find_element_by_css_selector('.p-name em').text.replace("\n", "")
good_price = good.find_element_by_css_selector('.p-price').text.replace("\n", "")
good_link = good.find_element_by_css_selector('.p-img a').get_attribute('href')
good_img = good.find_element_by_css_selector('.p-img img').get_attribute('src')
good_commit = good.find_element_by_css_selector('.p-commit').text.replace("\n", "")
good_shop = good.find_element_by_css_selector('.p-shop').text.replace("\n", "")
good_discounts = good.find_element_by_css_selector('.p-icons').text.replace("\n", "")
goods = '''
商品信息:
名称 %s
价格 %s
链接 %s
图片 %s
评价 %s
店铺 %s
优惠 %s
''' % (good_name, good_price, good_link, good_img, good_commit,good_shop,good_discounts)
# 将爬取的信息写入文件
with open('京东---%s.txt'%key_words, 'a', encoding='utf-8') as f:
f.write(goods + '\n')
# 找到当前页的“下一页”按钮
next_tag = driver.find_element_by_class_name('pn-next')
next_tag.click()
time.sleep(3)
get_goods(driver,key_words)
# 获得谷歌控制器
driver = webdriver.Chrome(executable_path=r'D:\Python2020-邱勋涛\爬虫self\淘宝\chromedriver.exe',
chrome_options=option)
try:
# 使用控制器,打开京东网站
driver.get('https://www.jd.com/')
driver.implicitly_wait(10)
# 得到文本搜索框
input_tag = driver.find_element_by_id('key')
key=input_tag.send_keys(key_words)
# 回车
input_tag.send_keys(Keys.ENTER)
get_goods(driver,key_words)
time.sleep(1000)
finally:
driver.close()
python爬取京东商品页面信息
猜你喜欢
转载自blog.csdn.net/Qiuxuntao/article/details/119255071
今日推荐
周排行