Find python simple page elements using xpath

from lxml Import html 

DEF the parse ():
     "" " " the contents of the html file, using small path extraction "" " 
    # reading file content 
    F = Open ( ' ./venv/static_/index.html ' , ' R & lt ' , encoding = ' UTF-. 8 ' ) 
    S = reached, f.read () 

    Selector = html.fromstring (S)
     # J parsing title 
    H3 = selector.xpath ( ' / HTML / body / H3 / text () ' )
     Print (h3 [0]) # here to get a list, I use the list get 
    f.close()

    # Parse the contents inside ul 
    ul = selector.xpath ( ' / HTML / body / ul / Li ' )
     # ul = selector.xpath ( '// ul / Li') may also be used 
    Print (len (ul))
     for Li   in UL:
         Print (li.xpath ( ' text () ' ) [0]) 

    # parse the contents inside tr 
    # tr = selector.xpath ( '/ HTML / body / form / Table / tr / TD / text ()' ) 
    # Print (TR) 

    # analytical element ul specified value 
    UL2 = selector.xpath ( ' / HTML / body / ul / Li [@ class = "Important"] / text () ' )
     Print (UL2) 

    # parsing specified ul elements attributes
    selector.xpath = A ( ' // div [@ ID = "Container"] / A / text () ' )
     Print (A [0]) 

    # the href attribute 
    the alink = selector.xpath ( ' // div [@ ID = "Container"] / A / @ the href ' )
     Print (the alink [0]) 

    # parse tag p 
    p = selector.xpath ( ' / HTML / body / p / text () ' )
     # p = selector.xpath (' / html / body / p [last ( )] / text () ') # Get the last 
    Print (len (P))
     Print (P [0]) 

    # use browsing xpath builder 
    Test selector.xpath = ( ' / HTML / body / form / Table / TR [. 1] / TH / text () ')# Can only learn / HTML / body / form / Table / tbody / TR [. 1] / TH 
    Print (Test [0]) 


IF  the __name__ == ' __main__ ' : 
    the parse ()

 

Guess you like

Origin www.cnblogs.com/zsjlovewm/p/11106458.html