BeautifulSoup4 search document tree Day3-8

# ! / Usr / bin / Python the env 
# Coding: UTF8 
# python2 
from BS4 Import the BeautifulSoup 

'' ' 
tag lookup attribute lookup: 

    Tags: 
        - global matching string filter string 
            matches the name attribute 
            attrs attribute to find matching 
            text matching text 

        - regular filter 
            module re match 

        - filter list 
            data matches the list 

        - BOOL filter 
            True match 

        - method filters 
            used in some of the properties to be unneeded attributes lookup. 

    Properties: 
        - the class_ 
        - ID 
'' ' 
html_doc = "" "
<html><head><title>The Dormouse's story</title></head><body><p class="sister"><b>$37</b></p><p class="story" id="p">Once upon a time there were three little sisters; and their names were<a href="http://example.com/elsie" class="sister" >Elsie</a><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>and they lived at the bottom of a well.</p><p class="story">...</p>
"""

soup = BeautifulSoup(html_doc,'lxml'= soup.find (name =
the p-' ''
string filter'' 'the Find and search for documents find_all
#text text match
#attrs attribute to find matching
#Name tag name#)





'p')
p_s = soup.find_all(name='p')

print(p)
print(p_s)

#name + attrs
p = soup.find(name='p',attrs={"id":"p"})
print(p)

#name + text
tag = soup.find(name='title',text="The Dormouse's story")
print(tag)

#name + attrs +text
tag = soup.find(name=' A ' , attrs = { " class " : " SISTER " }, text = " Elsie " )
 Print (Tag) 

'' ' 
regular filter 
re matching module 
' '' 

Import re
 # name 
# with a The re module matches node 
a = soup.find (the re.compile name = (( ' a ' )))
 Print (a) 

A_S = soup.find_all (= the re.compile name ( ' a ' ))
 Print (A_S) 

A_S = Soup. find_all (name = re.compile('a' ))
 Print (A_S) 

# attrs 
A = soup.find (attrs = { " ID " : the re.compile ( ' Link ' )})
 Print (A) 

# List Filter 
# data list matching 
Print (soup.find (name = [ ' A ' , ' P ' , ' HTML ' , the re.compile ( ' A ' )]))
 Print (soup.find_all (name = [ ' A ' , ' P ' , 'html' , The re.compile ( ' A ' )])) 

# BOOL filter 
# True Match 
Print (soup.find (name = True, attrs = { " ID " : True})) 

# Method filter 
# for a number to be property and do not need to look for properties 

DEF have_id_not_class (Tag):
     # Print (tag.name) 
    IF tag.name == ' the p- '  and tag.has_attr ( " the above mentioned id " ) and  not tag.has_attr ( " class " ):
       return Tag 

#print (soup.find_all (name == function object)) 
Print (soup.find_all (name = have_id_not_class ())) 

# supplementary knowledge: 
# ID 
A = soup.find (ID = ' link2 ' )
 Print (A) 

# class 
P = soup.find (the class_ = ' SISTER ' )
 Print (P)

 

Guess you like

Origin www.cnblogs.com/zxdhahaha/p/11128335.html