One. selenium
selenium can locate some resources:
# 1, find_element_by_id according to id look for # 2, find_element_by_link_text to find the link name of the control (text a label) # 3, find_element_by_partial_link_text find the control (text a label) fuzzy query based on the linked name of # 4, find_element_by_tag_name under the label name # 5, find_element_by_class_name The class name # . 6, according to the attribute name find_element_by_name # . 7, css find_element_by_css_selector The selector # . 8, selected according to the XPath find_element_by_xpath
About Baidu login:
from the Selenium Import webdriver Import Time Bro = webdriver.Chrome () bro.get ( " http://www.baidu.com " ) bro.implicitly_wait ( 10 ) # 1, according to id find find_element_by_id # 2, find_element_by_link_text to find the link name controls (text a label) # 3, find_element_by_partial_link_text to find the link name of the control (text a label) fuzzy query # 4, find_element_by_tag_name under the label name # 5, find_element_by_class_name class name # 6, find_element_by_name based on the attribute name # 7, find_element_by_css_selector according to css selector # 8、find_element_by_xpath 根据xpath选择 dl_button=bro.find_element_by_link_text("登录") dl_button.click() user_login=bro.find_element_by_id('TANGRAM__PSP_10__footerULoginBtn') user_login.click() time.sleep(1) input_name=bro.find_element_by_name('userName') input_name.send_keys("18861508055") input_password=bro.find_element_by_id("TANGRAM__PSP_10__password") input_password.send_keys(" 87,765,396,094,165 " ) submit_button to = bro.find_element_by_id ( ' TANGRAM__PSP_10__submit ' ) the time.sleep ( . 1 ) submit_button.click () the time.sleep ( 10 ) Print (bro.get_cookies ()) bro.close () # display and wait implicit wait # implicit wait: when looking for all elements, if not already loaded, the other 10 seconds # browser.implicitly_wait (10) represents all the waiting, # explicit wait: to wait for an explicit element is loaded # the wait = WebDriverWait (Browser, 10) # wait.until (EC.presence_of_element_located ((By.ID, 'content_left')))
two. Jingdong using selenium operate crawling links
from selenium import webdriver from selenium.webdriver.common.keys import Keys #键盘按键操作 import time bro=webdriver.Chrome() bro.get("https://www.jd.com") bro.implicitly_wait(10) def get_goods(bro): print("------------------------------------") goods_li = bro.find_elements_by_class_name('gl-item') for good in goods_li: img_url = good.find_element_by_css_selector('.p-img a img').get_attribute('src') if not img_url: img_url = 'https:' + good.find_element_by_css_selector('.p-img a img').get_attribute('data-lazy-img') url = good.find_element_by_css_selector('.p-img a').get_attribute('href') price Good.find_element_by_css_selector = ( ' .p. Price-I ' ) .text name = good.find_element_by_css_selector ( ' .p-name EM ' ) .text.replace ( ' \ n- ' , '' ) the commit = good.find_element_by_css_selector ( ' . the commit-A the p- ' ) .text Print ( ' '' product link:% s product images:% s trade name:% s commodity prices:% s goods number of comments:% s ' '' % (url, img_url, name, . price, the commit)) next_pageBro.find_element_by_partial_link_text = ( " next " ) the time.sleep ( . 1 ) next_page.click () the time.sleep ( . 1 ) get_goods (Bro) input_search = bro.find_element_by_id ( ' Key ' ) input_search.send_keys ( " sexy lingerie " ) input_search.send_keys (Keys.ENTER) # into another page the try : get_goods (Bro) the except Exception AS E: Print ( " end " ) a finally : bro.close ()
Third, other operations. Gets the element attributes, etc.
from the Selenium Import webdriver from selenium.webdriver Import ActionChains from selenium.webdriver.common.by Import By # in what ways to find, By.ID, By.CSS_SELECTOR from selenium.webdriver.common.keys Import Keys # keyboard key operation from the Selenium. webdriver.support Import expected_conditions AS EC from selenium.webdriver.support.wait Import WebDriverWait # wait for page load certain elements Browser = webdriver.Chrome () browser.get ( 'https://www.amazon.cn/') wait=WebDriverWait(browser,10) wait.until(EC.presence_of_element_located((By.ID,'cc-lm-tcgShowImgContainer'))) tag=browser.find_element(By.CSS_SELECTOR,'#cc-lm-tcgShowImgContainer img') #获取标签属性, print(tag.get_attribute('src'))
# Get the text content
# tag.text
# Acquires the tag ID, location, name, size (understand) Print (tag.id) Print (tag.location) Print (tag.tag_name) Print (tag.size) browser.close () Gets the label attribute
Analog browser to back:
# Analog browser forward and back # browser.back () # the time.sleep (10) # browser.forward ()
cookies management:
#cookies管理 # print(browser.get_cookies()) 获取cookie # browser.add_cookie({'k1':'xxx','k2':'yyy'}) 设置cookie # print(browser.get_cookies())
Run js, chains and action tab
# Run JS # from the webdriver Selenium Import # Import Time # # Bro webdriver.Chrome = () # bro.get ( "http://www.baidu.com") # bro.execute_script ( 'Alert ( "Hello World") ') # print warning # the time.sleep (5) # tab management # Import Time # from the Selenium Import webdriver # # Browser = webdriver.Chrome () # browser.get (' https://www.baidu.com ') # browser.execute_script ( 'window.open ()') # # Print (browser.window_handles) # get all the tabs # browser.switch_to_window(browser.window_handles[1]) # browser.get('https://www.taobao.com') # time.sleep(3) # browser.switch_to_window(browser.window_handles[0]) # browser.get('https://www.sina.com.cn') # browser.close() #动作链 # from selenium import webdriver # from selenium.webdriver import ActionChains # # from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素 # import time # # driver = webdriver.Chrome() # driver.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') # The wait = WebDriverWait (Driver,. 3) # # driver.implicitly_wait (. 3) using implicit wait # # # the try: # driver.switch_to.frame ( 'iframeResult') ## is switched to iframeResult # Sourse = driver.find_element_by_id ( ' draggable with ') # target = driver.find_element_by_id (' the droppables') # # # # a manner: based on the same serial chain operation performed # # = ActionChains actions (Driver) # get operation target strand # # actions.drag_and_drop (Sourse , target) # operation into the operation of the chain, ready to perform serial # # actions.perform () # # # way: the operation of different chain, the displacement of each movement is different # # # ActionChains(driver).click_and_hold(sourse).perform() # distance=target.location['x']-sourse.location['x'] # # # track=0 # while track < distance: # ActionChains(driver).move_by_offset(xoffset=2,yoffset=0).perform() # track+=2 # # ActionChains(driver).release().perform() # # time.sleep(10) # # # finally: # driver.close()