day03爬取京东商品信息

(一)初级爬取:

import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe')
num=1
try:
    driver.implicitly_wait(10)
    #往京东发送请求
    driver.get('http://www.jd.com/')

    input_tag=driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(3)

    good_list=driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        #print(good)
        #商品名字
        good_name=good.find_element_by_css_selector('.p-name em').text
        #print(good_name)

        #商品链接详情
        good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href')
        #print(good_url)

        #商品价格
        good_price=good.find_element_by_class_name('p-price').text
        #print(good_price)

        #商品评价
        good_commit=good.find_element_by_class_name('p-commit').text

        good_content=f'''
        num={num}
        商品名称:{good_name}
        商品链接:{good_url}
        商品价格:{good_price}
        商品评价:{good_commit}
        \n
        '''
        print(good_content)
        with open('jd.txt','a',encoding='utf-8')as f:
            f.write(good_content)
        num+=1
    print('商品写入完毕...')

    next_tag=driver.find_element_by_class_name('pn-next')
    next_tag.click()

finally:
    driver.close()

(二)中级爬取

import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe')
num=1
try:
    driver.implicitly_wait(10)
    #往京东发送请求
    driver.get('http://www.jd.com/')

    input_tag=driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)
    #下拉滑动5000px
    js_code='''
    window.scrollTo(0,5000)
    '''
    driver.execute_script(js_code)

    time.sleep(3)

    good_list=driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        #print(good)
        #商品名字
        good_name=good.find_element_by_css_selector('.p-name em').text
        #print(good_name)

        #商品链接详情
        good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href')
        #print(good_url)

        #商品价格
        good_price=good.find_element_by_class_name('p-price').text
        #print(good_price)

        #商品评价
        good_commit=good.find_element_by_class_name('p-commit').text

        good_content=f'''
        num={num}
        商品名称:{good_name}
        商品链接:{good_url}
        商品价格:{good_price}
        商品评价:{good_commit}
        \n
        '''
        print(good_content)
        with open('jd.txt','a',encoding='utf-8')as f:
            f.write(good_content)
        num+=1
    print('商品写入完毕...')

    next_tag=driver.find_element_by_class_name('pn-next')
    next_tag.click()

finally:
    driver.close()

(三)高级爬取

import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe')
def get_good(driver):
    num=1
    try:

        time.sleep(5)
        # 下拉滑动5000px
        js_code = '''
        window.scrollTo(0,5000)
        '''
        driver.execute_script(js_code)

        time.sleep(5)

        good_list = driver.find_elements_by_class_name('gl-item')
        for good in good_list:
            # print(good)
            # 商品名字
            good_name = good.find_element_by_css_selector('.p-name em').text
            # print(good_name)

            # 商品链接详情
            good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href')
            # print(good_url)

            # 商品价格
            good_price = good.find_element_by_class_name('p-price').text
            # print(good_price)

            # 商品评价
            good_commit = good.find_element_by_class_name('p-commit').text

            good_content = f'''
            num={num}
            商品名称:{good_name}
            商品链接:{good_url}
            商品价格:{good_price}
            商品评价:{good_commit}
            \n
            '''
            print(good_content)
            with open('jd.txt', 'a', encoding='utf-8')as f:
                f.write(good_content)
            num+=1

        print('商品写入完毕...')

        next_tag = driver.find_element_by_class_name('pn-next')
        next_tag.click()

        time.sleep(5)
        #递归调用函数本身
        get_good(driver)

    finally:
        driver.close()

if __name__=='__main__':
    driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe')
    try:
        driver.implicitly_wait(10)
        # 往京东发送请求
        driver.get('http://www.jd.com/')

        input_tag = driver.find_element_by_id('key')
        input_tag.send_keys('墨菲定律')
        input_tag.send_keys(Keys.ENTER)
        #调用获取商品信息函数
        get_good(driver)
    finally:
        driver.close()

猜你喜欢

转载自www.cnblogs.com/changgeyimeng/p/11128262.html