python爬取淘宝商品页面信息

import time

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

key_words = input("这位客官!请您输入要在淘宝查询爬取的商品:")


def get_browser():
    try:
        options = webdriver.ChromeOptions()
        options.add_experimental_option('excludeSwitches', ['enable-automation'])
        options.add_argument("--disable-blink-features=AutomationControlled")
        driver = webdriver.Chrome(executable_path=r'D:\Python2020-邱勋涛\爬虫self\淘宝\chromedriver.exe', options=options)

        driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
    
    
            "source": """
                            Object.defineProperty(navigator, 'webdriver', {
                              get: () => undefined
                            })
                          """
        })

        # return driver

        # 使用控制器,打开淘宝
        driver.get('https://www.taobao.com/')
        driver.implicitly_wait(10)
        # 得到文本搜索框
        input_tag = driver.find_element_by_id('q')
        input_tag.send_keys(key_words)
        # 回车
        input_tag.send_keys(Keys.ENTER)

        # 账号
        account_number = driver.find_element_by_id('fm-login-id')
        account_number.send_keys('淘宝账户')

        # 密码
        password = driver.find_element_by_id('fm-login-password')
        password.send_keys('淘宝密码')
        # 登录
        login_tag = driver.find_element_by_class_name('fm-btn')
        login_tag.click()
        #
        # #销量
        # sales_order_tag = driver.find_element_by_class_name('J_Ajax link')
        # sales_order_tag.click()

        number = 400
        # 循环拖动鼠标滚轮,使当前页的所有商品信息加载完成
        for line in range(20):
            js = 'window.scrollTo(0,%s)' % number
            number += 500
            driver.execute_script(js)
            time.sleep(0.1)
        # 拿到商品信息最大的div
        goods_div = driver.find_element_by_id('mainsrp-itemlist')
        print(goods_div)
        # 找到每个商品的item
        goods_list = goods_div.find_elements_by_class_name('item')
        print(goods_list)
        # 循环拿到的所有商品
        # 循环拿到的所有商品
        for good in goods_list:
            # 依次取出名字,价格,链接,图片,评论数
            good_title = good.find_element_by_css_selector('.row a').text.replace("\n", "")
            good_address = good.find_element_by_css_selector('.location').text.replace("\n", "")
            good_price = good.find_element_by_css_selector('.price').text.replace("\n", "")
            good_link = good.find_element_by_css_selector('.pic a').get_attribute('href')
            good_img = good.find_element_by_css_selector('.pic img').get_attribute('src')
            good_shop = good.find_element_by_css_selector('.shop a').text.replace("\n", "")
            good_pay_nummber = good.find_element_by_css_selector('.deal-cnt').text.replace("\n", "")

            goods = '''
                               商品信息:
                                   标题  %s
                                   地址  %s
                                   价格  %s
                                   链接  %s
                                   图片  %s
                                   店铺  %s
                                   付款  %s
                   ''' % (
                good_title, good_address, good_price, good_link, good_img, good_shop, good_pay_nummber)
            # 将爬取的信息写入文件
            with open('淘宝---%s.csv' % key_words, 'a', encoding='utf-8') as f:
                f.write(goods + '\n')
            # 找到当前页的“下一页”按钮
        #
        # iframe = driver.find_element_by_id("CrossStorageClient-f7955755-64ae-4be3-abd3-f107401fb2b7")
        #
        # # switch_to.frame进入到iframe里面去
        # driver.switch_to.frame(iframe)

        driver.find_element_by_class_name('item next').send_keys(Keys.ENTER)
        # print(1111111111111)
        # element1 = driver.find_element_by_css_selector('J_Ajax num icon-tag xh-highlight')
        # driver.execute_script("arguments[0].click();", element1)
        # print(2222222222222)
    except Exception as f:
        print("失败")
    finally:
        driver.close()

    # time.sleep(3)


if __name__ == '__main__':
    get_browser()

效果:

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/Qiuxuntao/article/details/119255106