课程来源:http://www.icourse163.org/learn/BIT-1001870001?tid=1002781006
import bs4 from bs4 import BeautifulSoup import requests def getText(url): try: r = requests.get(url, timeout = 20) r.raise_for_status() r.encoding = r.apparent_encoding #print(r.status_code) #print("hello " + r.text) return r.text except: print("爬取失败") return "" def doData(ulist, demo): soup = BeautifulSoup(demo, "html.parser") for child in soup.find('tbody').children: if isinstance(child, bs4.element.Tag): tds = child('td') ulist.append([tds[0].string, tds[1].string, tds[3].string]) def printResult(ulist, num): tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}" print(tplt.format("排名", "学校名称", "总分", chr(12288))) for i in range(num): u = ulist[i] #print(u) print(tplt.format(u[0], u[1], u[2], chr(12288))) def main(): url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html' ulist = [] txt = getText(url) #print(txt) doData(ulist, txt) printResult(ulist, 20) main()