# ! / Usr / bin / Python the env # Coding: UTF8 # python2 from BS4 Import the BeautifulSoup '' ' tag lookup attribute lookup: Tags: - global matching string filter string matches the name attribute attrs attribute to find matching text matching text - regular filter module re match - filter list data matches the list - BOOL filter True match - method filters used in some of the properties to be unneeded attributes lookup. Properties: - the class_ - ID '' ' html_doc = "" " <html><head><title>The Dormouse's story</title></head><body><p class="sister"><b>$37</b></p><p class="story" id="p">Once upon a time there were three little sisters; and their names were<a href="http://example.com/elsie" class="sister" >Elsie</a><a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and<a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>and they lived at the bottom of a well.</p><p class="story">...</p> """ soup = BeautifulSoup(html_doc,'lxml'= soup.find (name = the p-' '' string filter'' 'the Find and search for documents find_all #text text match #attrs attribute to find matching #Name tag name#) 'p') p_s = soup.find_all(name='p') print(p) print(p_s) #name + attrs p = soup.find(name='p',attrs={"id":"p"}) print(p) #name + text tag = soup.find(name='title',text="The Dormouse's story") print(tag) #name + attrs +text tag = soup.find(name=' A ' , attrs = { " class " : " SISTER " }, text = " Elsie " ) Print (Tag) '' ' regular filter re matching module ' '' Import re # name # with a The re module matches node a = soup.find (the re.compile name = (( ' a ' ))) Print (a) A_S = soup.find_all (= the re.compile name ( ' a ' )) Print (A_S) A_S = Soup. find_all (name = re.compile('a' )) Print (A_S) # attrs A = soup.find (attrs = { " ID " : the re.compile ( ' Link ' )}) Print (A) # List Filter # data list matching Print (soup.find (name = [ ' A ' , ' P ' , ' HTML ' , the re.compile ( ' A ' )])) Print (soup.find_all (name = [ ' A ' , ' P ' , 'html' , The re.compile ( ' A ' )])) # BOOL filter # True Match Print (soup.find (name = True, attrs = { " ID " : True})) # Method filter # for a number to be property and do not need to look for properties DEF have_id_not_class (Tag): # Print (tag.name) IF tag.name == ' the p- ' and tag.has_attr ( " the above mentioned id " ) and not tag.has_attr ( " class " ): return Tag #print (soup.find_all (name == function object)) Print (soup.find_all (name = have_id_not_class ())) # supplementary knowledge: # ID A = soup.find (ID = ' link2 ' ) Print (A) # class P = soup.find (the class_ = ' SISTER ' ) Print (P)