1 问题出现:
<title>个è¡ç¹ç _ ä¸æ¹è´¢å¯ç½</title>
2 问题解决:
确认网页内容编码,并解密加密
def assert_encoding(response): if response.encoding == 'ISO-8859-1': encodings = requests.utils.get_encodings_from_content(response.text) if encodings: encoding = encodings[0] else: encoding = response.apparent_encoding return encoding def test_content(): mainurl = 'http://stock.eastmoney.com/news/cggdj.html' response = requests.get(url=mainurl ) print("response encoding=\t{}".format(response.encoding)) print("response html encoding =\t{}".format(response.apparent_encoding)) print("response content encoding =\t{}".format(requests.utils.get_encodings_from_content(response.text))) encoding = assert_encoding(response) print("assert encoding=\t{}".format(encoding)) encode_content = response.content.decode(encoding, 'replace').encode('utf-8', 'replace') soup = BeautifulSoup(encode_content, 'html.parser') print(str(soup)[:2000]) if __name__ == '__main__': test_content()