(A) Primary crawling:
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') num=1 try: driver.implicitly_wait(10) #往京东发送请求 driver.get('http://www.jd.com/') input_tag=driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(3) good_list=driver.find_elements_by_class_name('gl-item') for good in good_list: #print(good) #商品名字 good_name=good.find_element_by_css_selector('.p-name em').text #print(good_name) #商品链接详情 good_url=good.find_element_by_css_selector('.p-name a').get_attribute('href') #Print (good_url) # commodity price good_price = good.find_element_by_class_name ( ' P-. price ' ) .text # Print (good_price) # product reviews good_commit = good.find_element_by_class_name ( ' P-the commit ' ) .text good_content = F '' ' NUM = {num} product name: {good_name} product link: {good_url} commodity prices: {good_price} product reviews: good_commit} { \ n- '' ' Print (good_content) with Open ( ' jd.txt ' , 'a',encoding='utf-8')as f: f.write(good_content) num+=1 print('商品写入完毕...') next_tag=driver.find_element_by_class_name('pn-next') next_tag.click() finally: driver.close()
(Ii) Intermediate crawling
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') num=1 try: driver.implicitly_wait(10) #往京东发送请求 driver.get('http://www.jd.com/') input_tag=driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) time.sleep(5) #下拉滑动5000px js_code=''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) time.sleep(3) good_list=driver.find_elements_by_class_name('gl-item') for good in good_list: #print(good) #商品名字 good_name=good.find_element_by_css_selector('.p-name em').text #print(good_name) #Product Link Details good_url = good.find_element_by_css_selector ( ' .p-name A ' ) .get_attribute ( ' href ' ) # Print (good_url) # commodity price good_price = good.find_element_by_class_name ( ' the p--. Price ' ) .text # Print (good_price ) # product reviews good_commit = good.find_element_by_class_name ( ' P-the commit ' ) .text good_content = F '' ' NUM = NUM} { trade name: {good_name} product link: {good_url} Product Price: {good_price} product reviews: good_commit} { \ n- '' ' Print (good_content) with Open ( ' jd.txt ' , ' A ' , encoding = ' UTF-. 8 ' ) AS F: f.write (good_content ) NUM + =. 1 Print ( ' goods writing has been completed ... ' ) next_tag = driver.find_element_by_class_name ( ' PN-Next ' ) next_tag.click () the finally : driver.close ()
(C) Senior crawling
import time from selenium.webdriver.common.keys import Keys from selenium import webdriver driver = webdriver.Chrome(r'D:\Python\Scripts\chromedriver.exe') def get_good(driver): num=1 try: time.sleep(5) # 下拉滑动5000px js_code = ''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) time.sleep(5) good_list Driver.find_elements_by_class_name = ( ' GL-Item ' ) for Good in good_list: # Print (Good) # commodity name good_name = good.find_element_by_css_selector ( ' .p-name EM ' ) .text # Print (good_name) # commodity details link good_url good.find_element_by_css_selector = ( ' .p-name A ' ) .get_attribute ( ' the href ' ) # Print (good_url) # commodities = good.find_element_by_class_name good_price ( ' P-. price ' ) .text # Print (good_price) # product reviews good_commit = good.find_element_by_class_name ( ' P-the commit ' ) .text good_content = F '' ' NUM = NUM} { trade name: {good_name} product link: {good_url} commodity prices: {good_price} product reviews: good_commit} { \ n- '' ' Print (good_content) with Open ( ' jd.txt ' , 'a' , Encoding = ' UTF-. 8 ' ) AS F: f.write (good_content) NUM + =. 1 Print ( ' Goods writing has been completed ... ' ) next_tag = driver.find_element_by_class_name ( ' PN-Next ' ) next_tag.click () the time.sleep ( . 5 ) # recursive call to the function itself get_good (Driver) the finally : driver.close () IF the __name__ == ' __main__ ' : Driver = webdriver.Chrome (R & lt ' D: \ the Python \ the Scripts \ chromedriver.exe ' ) the try : driver.implicitly_wait ( 10 ) # to jingdong transmission request driver.get ( ' http://www.jd.com/ ' ) The input_tag = driver.find_element_by_id ( ' Key ' ) input_tag.send_keys ( ' Murphy's Law ' ) input_tag.send_keys (Keys.ENTER) # call the function to obtain product information get_good (Driver) a finally : driver.close ()