day03 crawling Jingdong product information

(A) Primary crawling:

import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe')
num=1
try:
    driver.implicitly_wait(10)
    #往京东发送请求
    driver.get('http://www.jd.com/')

    input_tag=driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(3)

    good_list=driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        #print(good)
        #商品名字
        good_name=good.find_element_by_css_selector('.p-name em').text
        #print(good_name)

        #商品链接详情
        good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href')
        #Print (good_url) 

        # commodity price 
        good_price = good.find_element_by_class_name ( ' P-. price ' ) .text
         # Print (good_price) 

        # product reviews 
        good_commit = good.find_element_by_class_name ( ' P-the commit ' ) .text 

        good_content = F '' ' 
        NUM = {num} 
        product name: {good_name} 
        product link: {good_url} 
        commodity prices: {good_price} 
        product reviews: good_commit} { 
        \ n- 
        '' ' 
        Print (good_content) 
        with Open ( ' jd.txt ' , 'a',encoding='utf-8')as f:
            f.write(good_content)
        num+=1
    print('商品写入完毕...')

    next_tag=driver.find_element_by_class_name('pn-next')
    next_tag.click()

finally:
    driver.close()

(Ii) Intermediate crawling

import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe')
num=1
try:
    driver.implicitly_wait(10)
    #往京东发送请求
    driver.get('http://www.jd.com/')

    input_tag=driver.find_element_by_id('key')
    input_tag.send_keys('墨菲定律')
    input_tag.send_keys(Keys.ENTER)

    time.sleep(5)
    #下拉滑动5000px
    js_code='''
    window.scrollTo(0,5000)
    '''
    driver.execute_script(js_code)

    time.sleep(3)

    good_list=driver.find_elements_by_class_name('gl-item')
    for good in good_list:
        #print(good)
        #商品名字
        good_name=good.find_element_by_css_selector('.p-name em').text
        #print(good_name)

        #Product Link Details 
        good_url = good.find_element_by_css_selector ( ' .p-name A ' ) .get_attribute ( ' href ' )
         # Print (good_url) 

        # commodity price 
        good_price = good.find_element_by_class_name ( ' the p--. Price ' ) .text
         # Print (good_price ) 

        # product reviews 
        good_commit = good.find_element_by_class_name ( ' P-the commit ' ) .text 

        good_content = F '' ' 
        NUM = NUM} { 
        trade name: {good_name} 
        product link: {good_url}
        Product Price: {good_price} 
        product reviews: good_commit} { 
        \ n- 
        '' ' 
        Print (good_content) 
        with Open ( ' jd.txt ' , ' A ' , encoding = ' UTF-. 8 ' ) AS F: 
            f.write (good_content ) 
        NUM + =. 1
     Print ( ' goods writing has been completed ... ' ) 

    next_tag = driver.find_element_by_class_name ( ' PN-Next ' ) 
    next_tag.click () 

the finally : 
    driver.close ()

(C) Senior crawling

import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver

driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe')
def get_good(driver):
    num=1
    try:

        time.sleep(5)
        # 下拉滑动5000px
        js_code = '''
        window.scrollTo(0,5000)
        '''
        driver.execute_script(js_code)

        time.sleep(5)

        good_list Driver.find_elements_by_class_name = ( ' GL-Item ' )
         for Good in good_list:
             # Print (Good) 
            # commodity name 
            good_name = good.find_element_by_css_selector ( ' .p-name EM ' ) .text
             # Print (good_name) 

            # commodity details link 
            good_url good.find_element_by_css_selector = ( ' .p-name A ' ) .get_attribute ( ' the href ' )
             # Print (good_url) 

            # commodities
            = good.find_element_by_class_name good_price ( ' P-. price ' ) .text
             # Print (good_price) 

            # product reviews 
            good_commit = good.find_element_by_class_name ( ' P-the commit ' ) .text 

            good_content = F '' ' 
            NUM = NUM} { 
            trade name: {good_name} 
            product link: {good_url} 
            commodity prices: {good_price} 
            product reviews: good_commit} { 
            \ n- 
            '' ' 
            Print (good_content) 
            with Open ( ' jd.txt ' , 'a' , Encoding = ' UTF-. 8 ' ) AS F: 
                f.write (good_content) 
            NUM + =. 1 Print ( ' Goods writing has been completed ... ' ) 
        next_tag = driver.find_element_by_class_name ( ' PN-Next ' ) 
        next_tag.click () 
        the time.sleep ( . 5 )
         # recursive call to the function itself         get_good (Driver) the finally : 
        driver.close () IF the __name__ == ' __main__ ' : 
    Driver = webdriver.Chrome (R & lt

        




    

 ' D: \ the Python \ the Scripts \ chromedriver.exe ' )
     the try : 
        driver.implicitly_wait ( 10 )
         # to jingdong transmission request 
        driver.get ( ' http://www.jd.com/ ' ) 

        The input_tag = driver.find_element_by_id ( ' Key ' ) 
        input_tag.send_keys ( ' Murphy's Law ' ) 
        input_tag.send_keys (Keys.ENTER) 
        # call the function to obtain product information 
        get_good (Driver)
     a finally : 
        driver.close ()

 

Guess you like

Origin www.cnblogs.com/changgeyimeng/p/11128262.html