#!/usr/bin/env python #coding: utf8 #python2 #Beautiful选择器 from bs4 import BeautifulSoup html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> <p class="sister"><b>$37</b></p> <p class="story" id="p">Once upon a time there were three little sisters; and their names were <a href="http://example.com/elsie" class="sister" >Elsie</a>, <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>; and they lived at the bottom of a well.</p> <the p-class = "Story"> ... </ the p-> "" " Soup = BeautifulSoup (html_doc, ' lxml ' ) # traversing the document tree # 1 focus directly Print (soup.html) Print (of the type (Soup. HTML)) Print (soup.a) Print (soup.p) # 2. obtain the label name Print (soup.a.name) # 3. Key attributes of the acquired tag Print (soup.a.attrs) # get a label All attributes print (soup.a.attrs [ ' the href ' ]) # 4. Get key text label print (soup.a.text) # 5. the nested selected print(soup.html.body.p) # 6. The child node, descendant node Print (soup.p.children) # Returns iterator object Print ((List (soup.p.children))) # 7. The parent node ancestor node Print (soup.b.parent) Print (soup.b.parents) Print (List (soup.b.parents)) # 8. the sibling Print (soup.a) # next sibling node Print (soup.a. NEXT_SIBLING) # Get all the next sibling node, returns a producer Print (soup.a.next_siblings) Print (List (soup.a.next_siblings)) # obtaining a sibling Print (soup.a.previous_sibling) # get all the siblings on a return of a generator print(list(soup.a.previous_siblings))