import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
key_words = input("这位客官!请您输入要在淘宝查询爬取的商品:")
def get_browser():
try:
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(executable_path=r'D:\Python2020-邱勋涛\爬虫self\淘宝\chromedriver.exe', options=options)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
# return driver
# 使用控制器,打开淘宝
driver.get('https://www.taobao.com/')
driver.implicitly_wait(10)
# 得到文本搜索框
input_tag = driver.find_element_by_id('q')
input_tag.send_keys(key_words)
# 回车
input_tag.send_keys(Keys.ENTER)
# 账号
account_number = driver.find_element_by_id('fm-login-id')
account_number.send_keys('淘宝账户')
# 密码
password = driver.find_element_by_id('fm-login-password')
password.send_keys('淘宝密码')
# 登录
login_tag = driver.find_element_by_class_name('fm-btn')
login_tag.click()
#
# #销量
# sales_order_tag = driver.find_element_by_class_name('J_Ajax link')
# sales_order_tag.click()
number = 400
# 循环拖动鼠标滚轮,使当前页的所有商品信息加载完成
for line in range(20):
js = 'window.scrollTo(0,%s)' % number
number += 500
driver.execute_script(js)
time.sleep(0.1)
# 拿到商品信息最大的div
goods_div = driver.find_element_by_id('mainsrp-itemlist')
print(goods_div)
# 找到每个商品的item
goods_list = goods_div.find_elements_by_class_name('item')
print(goods_list)
# 循环拿到的所有商品
# 循环拿到的所有商品
for good in goods_list:
# 依次取出名字,价格,链接,图片,评论数
good_title = good.find_element_by_css_selector('.row a').text.replace("\n", "")
good_address = good.find_element_by_css_selector('.location').text.replace("\n", "")
good_price = good.find_element_by_css_selector('.price').text.replace("\n", "")
good_link = good.find_element_by_css_selector('.pic a').get_attribute('href')
good_img = good.find_element_by_css_selector('.pic img').get_attribute('src')
good_shop = good.find_element_by_css_selector('.shop a').text.replace("\n", "")
good_pay_nummber = good.find_element_by_css_selector('.deal-cnt').text.replace("\n", "")
goods = '''
商品信息:
标题 %s
地址 %s
价格 %s
链接 %s
图片 %s
店铺 %s
付款 %s
''' % (
good_title, good_address, good_price, good_link, good_img, good_shop, good_pay_nummber)
# 将爬取的信息写入文件
with open('淘宝---%s.csv' % key_words, 'a', encoding='utf-8') as f:
f.write(goods + '\n')
# 找到当前页的“下一页”按钮
#
# iframe = driver.find_element_by_id("CrossStorageClient-f7955755-64ae-4be3-abd3-f107401fb2b7")
#
# # switch_to.frame进入到iframe里面去
# driver.switch_to.frame(iframe)
driver.find_element_by_class_name('item next').send_keys(Keys.ENTER)
# print(1111111111111)
# element1 = driver.find_element_by_css_selector('J_Ajax num icon-tag xh-highlight')
# driver.execute_script("arguments[0].click();", element1)
# print(2222222222222)
except Exception as f:
print("失败")
finally:
driver.close()
# time.sleep(3)
if __name__ == '__main__':
get_browser()
python爬取淘宝商品页面信息
猜你喜欢
转载自blog.csdn.net/Qiuxuntao/article/details/119255106
今日推荐
周排行