1. 使用requests库
import requests url="http://www.starbaby.cn/zhinan/609987" req =requests.get(url) req.encoding='utf-8' #显式地指定网页编码,一般情况可以不用 print(req.text)
2. 使用BeautifulSoup
from bs4 import BeautifulSoup from bs4 import UnicodeDammit import requests def run(): r = requests.get('http://zy.upln.cn/gongshi2014/index.html') soup = r.text.encode(r.encoding) #这里获取的text先按照指定的字符集解析下,这样gbk、utf8都可以了 soup = BeautifulSoup(soup, 'html.parser') soup = soup.find('tbody') for x in soup.find_all('tr'): for y in x.find_all('td'): s = y.a.text print(s) if __name__=="__main__": run()