1 from lxml import etree 2 import urllib3 3 import requests 4 urllib3.disable_warnings() 5 url="https://www.cnblogs.com/mvc/blog/news.aspx?blogApp=xiaoyujuan" 6 7 r = requests.get(url,verify=False) 8 # print(r.text) 9 10 dom = etree.HTML(r.content.decode("utf-8")) 11 block = dom.xpath("//*[@id='profile_block']") 12etree.tostring = T (Block [0], encoding = ' UTF-. 8 ' , pretty_print = True) 13 is Print (t.decode ( " UTF-. 8 " )) 14 15 T1 = Block [0] .xpath ( " text () " ) # Get the current text element node 16 Print (T1) . 17 T2 = Block [0] .xpath ( ' a ' ) # positioning a label 18 is for I, J in ZIP (T1, T2): . 19 Print ( " S% S% " % (I, j.text))
1 from lxml import etree 2 htmldemo = ''' 3 <meta charset="UTF-8"> <!-- for HTML5 --> 4 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 5 <html><head><title>yoyo ketang</title></head><body><b><!--Hey, this in comment!--></b> 6 <p class="title"><b>yoyoketang</b></p><p class="yoyo">这里是我的微信公众号:yoyoketang <br> 7 <a href="http://www.cnblogs.com/yoyoketang/tag/fiddler/" class="sister" id="link1">fiddler教程</a><br> 8 <a href="http://www.cnblogs.com/yoyoketang/tag/python/" class="sister" id="link2">python笔记</a><br> 9 <a href="http://www.cnblogs.com/yoyoketang/tag/selenium/" class="sister" id="link3"> selenium document <br> </a> 10 Come attention now! </ P> . 11 <P class = "Story"> ... </ P> 12 is ' '' 13 is # etree.HTMLz parsed html content 14 Demo = etree.HTML (htmldemo) 15 # html content after the printing resolution, the method can be used etree.tosting 16 # encoding = "UTF-. 8" normal output parameter inside the html content in Chinese . 17 # pretty_print = True in a standard format of the output 18 is T = etree.tostring (Demo, encoding = ' UTF-. 8 ' , = pretty_print True) . 19 Print (T.