1. crawling commodity information jingdong
Functions: search for "Murphy's Law" will be crawling all product information on Jingdong, after the completion of a crawling crawling Next until all information has been crawling
from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By import time def get_good(driver): number = 1 try: time.sleep(5) js_code=''' window.scrollTo(0,5000) ''' driver.execute_script(js_code) time.sleep(5) good_list = driver.find_elements_by_class_name('gl-item') for good in good_list: good_name = good.find_element_by_css_selector('.p-name em').text good_url = good.find_element_by_css_selector('.p-name a').get_attribute('href') good_price = good.find_element_by_class_name('p-price').text good_commit = good.find_element_by_class_name('p-commit').text good_content = f'' ' Number: {number} Product Name: {good_name} product link: {good_url} commodity prices: {good_price} product reviews: good_commit} { \ n- ' '' Print (good_content) with Open ( ' jd3.text ' , ' A ' , encoding = ' UTF-. 8 ' ) AS F: f.write (good_content) Number + =. 1 Print ( " commodity information written successfully !! " ) next_tag = driver.find_element_by_class_name ( 'pn-next') next_tag.click() time.sleep(5) get_good(driver) finally: driver.close() if __name__ == '__main__': driver=webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('https://www.jd.com/') input_tag = driver.find_element_by_id('key') input_tag.send_keys('墨菲定律') input_tag.send_keys(Keys.ENTER) get_good(driver) finally: driver.close()
Partial results show screenshots:
2. The element dragged to a location at a specified
2.1 instant drag
from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver import ActionChains import time driver=webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') time.sleep(5) driver.switch_to.frame('iframeResult') time.sleep(1) source = driver.find_element_by_id('draggable') target = driver.find_element_by_id('droppable') ActionChains(driver).drag_and_drop(source,target).perform() time.sleep(10) finally: driver.close()
2.2 slow-moving
from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver import ActionChains import time driver=webdriver.Chrome() try: driver.implicitly_wait(10) driver.get('https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable') time.sleep(5) driver.switch_to.frame('iframeResult') time.sleep(1) source = driver.find_element_by_id('draggable') target = driver.find_element_by_id('droppable') distance = target.location['x']-source.location['x'] ActionChains(driver).click_and_hold(source).perform() s=0 while s<distance: ActionChains(driver).move_by_offset(xoffset=5,yoffset=0).perform() s+=5 time.sleep(0.1) ActionChains(driver).release().perform() time.sleep(10) finally: driver.close()
3.BeautifulSoup4
html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> <p class="sister"><b>$37</b></p> <p class="story" id="p">Once upon a time there were three little sisters; and their names were <a href="http://example.com/elsie" class="sister" >Elsie</a>, <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>; and they lived at the bottom of a well.</p> <p class="story">...</p> """ from bs4 import BeautifulSoup soup = BeautifulSoup(html_doc,'lxml') #1. Direct use # Print (soup.html) # Print (of the type (soup.html)) # Print (soup.a) # Print (soup.p) # 2. obtain a name tag # Print (soup.a.name ) # property tag 3. Get # Print (soup.a.attrs) # Print (soup.a.attrs [ 'the href']) # Print (soup.a.attrs [ 'class']) # 4. Get tag text content # Print (soup.p.text) # 5. the nested selection # Print (soup.html.body.p) # 6. the child node, descendant node # Print (soup.p.children) # returns an iterator Object # Print (List (soup.p.children)) # 7. parent, ancestor node # print(soup.b.parent) # print(soup.b.parents) # print(list(soup.b.parents))
to sum up:
find: a first look
find_all: find all
name --- name tag
attrs ----- property to find a match
text-matching text -----
label:
------ global string matching filter string
Regular filters ------
re matching module
------ List Filter
Data matching in the list
------ bool filter
True match
Filter Method ------
For some properties and do not need to look for properties
Attributes:
-clas-
-id-