Reptile _ urllib use the library without any anti-climb means crawling Baidu Home

"" " 
Get Baidu Home
chardet: analytic function encoding format
" ""

from urllib Import Request
Import the chardet

get # page source code to binary
url = "https://www.baidu.com/"
byteHtml = request.urlopen (url ) .read ()
Print (byteHtml)

# encoding format is determined binary code, the dictionary returns a value of
# { 'encoding': 'ASCII', 'confidence': 1.0, 'Language': ''}
result = chardet.detect (byteHtml )
Print (Result)
Print (type (Result))
# source code obtained by decoding the binary encoding format correct page source code (a little, incomplete)
resHtml byteHtml.decode = (Result [ "encoding"])
Print (resHtml)




Guess you like

Origin www.cnblogs.com/YangQingHong/p/10968898.html