python interface to automate analytical --lxml

 

 1 from lxml import etree
 2 import urllib3
 3 import requests
 4 urllib3.disable_warnings()
 5 url="https://www.cnblogs.com/mvc/blog/news.aspx?blogApp=xiaoyujuan"
 6 
 7 r = requests.get(url,verify=False)
 8 # print(r.text)
 9 
10 dom = etree.HTML(r.content.decode("utf-8"))
11 block = dom.xpath("//*[@id='profile_block']")
12etree.tostring = T (Block [0], encoding = ' UTF-. 8 ' , pretty_print = True)
 13 is  Print (t.decode ( " UTF-. 8 " ))
 14  
15 T1 = Block [0] .xpath ( " text () " ) # Get the current text element node 
16  Print (T1)
 . 17 T2 = Block [0] .xpath ( ' a ' ) # positioning a label 
18 is  for I, J in ZIP (T1, T2):
 . 19      Print ( " S% S% " % (I, j.text))

 

 1 from lxml import etree
 2 htmldemo = ''' 
 3 <meta charset="UTF-8"> <!-- for HTML5 -->
 4 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 5 <html><head><title>yoyo ketang</title></head><body><b><!--Hey, this in comment!--></b>
 6 <p class="title"><b>yoyoketang</b></p><p class="yoyo">这里是我的微信公众号:yoyoketang <br>
 7 <a href="http://www.cnblogs.com/yoyoketang/tag/fiddler/" class="sister" id="link1">fiddler教程</a><br>
 8 <a href="http://www.cnblogs.com/yoyoketang/tag/python/" class="sister" id="link2">python笔记</a><br>
 9 <a href="http://www.cnblogs.com/yoyoketang/tag/selenium/" class="sister" id="link3"> selenium document <br> </a>
 10  Come attention now! </ P>
 . 11  <P class = "Story"> ... </ P>
 12 is  ' '' 
13 is  # etree.HTMLz parsed html content 
14 Demo = etree.HTML (htmldemo)
 15  # html content after the printing resolution, the method can be used etree.tosting 
16  # encoding = "UTF-. 8" normal output parameter inside the html content in Chinese 
. 17  # pretty_print = True in a standard format of the output 
18 is T = etree.tostring (Demo, encoding = ' UTF-. 8 ' , = pretty_print True)
 . 19  Print (T.

 

Guess you like

Origin www.cnblogs.com/xiaoyujuan/p/11304355.html
Recommended