python request 中文乱码

1 问题出现:

<title>个è¡ç¹ç _ ä¸æ¹è´¢å¯ç½</title>

2 问题解决:

确认网页内容编码,并解密加密

def assert_encoding(response):
    if response.encoding == 'ISO-8859-1':
        encodings = requests.utils.get_encodings_from_content(response.text)
        if encodings:
            encoding = encodings[0]
        else:
            encoding = response.apparent_encoding
    return encoding

def test_content():
    mainurl = 'http://stock.eastmoney.com/news/cggdj.html'
    response = requests.get(url=mainurl )
    print("response encoding=\t{}".format(response.encoding))
    print("response html encoding =\t{}".format(response.apparent_encoding))
    print("response content encoding =\t{}".format(requests.utils.get_encodings_from_content(response.text)))
    encoding = assert_encoding(response)
    print("assert encoding=\t{}".format(encoding))
    encode_content = response.content.decode(encoding, 'replace').encode('utf-8', 'replace')
    soup = BeautifulSoup(encode_content, 'html.parser')
    print(str(soup)[:2000])

if __name__ == '__main__':
    test_content()



猜你喜欢

转载自blog.csdn.net/zn505119020/article/details/80081660