xpath usage method (starts-with) match header

html_doc = '''
 <html>
    <head>
        <title>
             The Dormouse's story
        </title>
    </head>
    <body>
        <p class="title">
            <b>
                The Dormouse's story
            </b>
        </p>
        <p class="story">
            Once upon a time there were three little sisters; and their names were
            <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
            <a class="sister" href="http://example.com/lacie"id="link2">Lacie</a>
            and
            <a class="sister" href="http://example.com/tillie" id="link2">Tillie</a>
            and they lived at the bottom of a well.
        </p>
        <p class="story">
            测试中文
        </p>
        <ul>
            <li> python </li>
            <li> C# </li>
            <li> java </li>
            <li> .net </li>
        </ul>
    </body>
 </html>
'''
from lxml import etree
'''
etr = etree.HTML(html_doc)  # print(html_doc)
'''
from lxml import etree Use the package that Xpath depends on
from Parse the text import html_doc Import the parsing content (html)

#Create parsing object html object
 a_like=etr.xpath( '//p[@class="story"]/a[starts-with(@href,"http://example.com/")]/@href' )
a_text=etr.xpath( '//p[@class="story"]/a[starts-with(@href,"http://example.com/")]/text()' )
 '''
 Detailed Parsing:
 // Directly locate the p tag whose class is equal to story
 / locate the a tag and the href attribute of the a tag must start with "http://example.com/"
 starts-with can be used in other places such as
 p[starts- with(@class,"s")] means to find the class of the p tag is the element
 '''
 of s t_l= zip (a_text,a_like)
link_text={}
for l,t  in t_l:
    link_text.update({l:t})
print(link_text)
print(link_text.get('Elsie','没有'))


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325800548&siteId=291194637