day 03 selenium and Beautifulsoup4 principle and use

# Crawling jingdong product data
Import Time
from the webdriver Selenium Import
from selenium.webdriver.common.keys Import Keys
DEF get_good (Driver):
NUM =. 1
the try:
the time.sleep (. 5)
# drop-down sliding 5000px
js_code = '' '
window. the scrollTo (0,5000)
'' '
driver.execute_script (js_code)
# wait 5 seconds for the commodity data load
the time.sleep (. 5)
good_list = driver.find_elements_by_class_name (' GL-Item ')
for in good_list Good:
# Print ( Good)
# product name
good_name = good.find_element_by_css_selector ( '. p- name em').text
# print(good_name)
= good.find_element_by_css_selector good_url ( '. A name-P'). get_attribute ( 'the href')
# Print (good_url)
good_price = good.find_element_by_class_name ( '. price-P'). text
# Print (good_price)

# product reviews
good_commit = good.find_element_by_class_name ( 'the commit-P') text.
good_content = F '' '
product name: {good_name}
product link: {good_url}
commodity prices: {good_price}
product reviews: good_commit} {
\ n-
' ''
Print (good_content )
with Open ( 'jd.txt', 'A', encoding = 'UTF-. 8') AS F:
f.write(good_content)
+ 1 = NUM
Print ( 'product information is written successfully!')
# find the next page and click
next_tag = driver.find_element_by_class_name ( 'the pn-the Next')
next_tag.click ()
the time.sleep (5)
# recursive function calls itself
get_good (Driver)
the finally:
driver.close ()

IF __name__ __ == '__ main__':
Driver = webdriver.Chrome ()
the try:
driver.implicitly_wait (10)
# to jingdong transmission request
driver.get ( 'http: //www.jd .com / ')
# to the home input jingdong Murphy's Law, and press enter
The input_tag = driver.find_element_by_id (' key ')
input_tag.send_keys (' Murphy's Law ')
The input_tag.send_keys(Keys.ENTER)

# Retrieval of product information function
get_good (Driver)
a finally:
driver.close ()
Beautifulsoup4 principle and use
html_doc='''
<html><head><title>The Dormouse's story</title></head>
<body>
<p class="sister"><b>$37</b></p>

<p class="story" id="p">Once upon a time there were three little sisters; and their names were
<a href="http://example.com/elsie" class="sister" >Elsie</a>,
<a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
and they lived at the bottom of a well.</p>

<p class="story">...</p>
''' # BS4 objectssoup = the BeautifulSoup (html_doc,' lxml ')# BS4 obtained by using a target soup# = the BeautifulSoup soup (html_doc,' html.parser ')# Python comes parsing library
From the BeautifulSoup BS4 Import






Print # (Soup)
# BS4 type
# Print (type (Soup))
# beautification
# = soup.prettify HTML ()
# Print (HTML)


#. 1, direct selection tag (an object is returned ) *****
print (soup.a) # obtain a first label
print (soup.p) # p obtaining a first label
Print (type (soup.a)) # <class 'bs4.element.Tag'>

# 2, obtaining the name of the label
print (soup.a.name) # get a name tag

# 3, to obtain property of the label *****
Print (soup.a.attrs) # get all properties within a label
print (soup.a .attrs [ 'href']) # Gets href attributes within a tag

# 4, obtaining a tag text *****
Print (soup.p.text) # 37 [$
#. 5, select the nested label
print (soup. Pb) # b acquires a tag in the first p label
print (soup.pbtext) # printing text in the label b

# 6, child node, descendant node
# acquisition sub-node
print (soup.p.children) # Get the first p tag all child node returns an iterator
Print (List (soup.p.children)) # turn as a list List
# 7, parent, ancestor node
print (soup.b.parent)
Print (soup.b.parents)
Print (List (soup.b.parents))


Guess you like

Origin www.cnblogs.com/RuiZi/p/11130042.html