python爬虫-中国最好大学排名

课程来源:http://www.icourse163.org/learn/BIT-1001870001?tid=1002781006

import bs4
from bs4 import BeautifulSoup
import requests
def getText(url):
	try:
		r = requests.get(url, timeout = 20)
		r.raise_for_status()
		r.encoding = r.apparent_encoding
		#print(r.status_code)
		#print("hello " + r.text)
		return r.text
	except:
		print("爬取失败")
		return ""
def doData(ulist, demo):
	soup = BeautifulSoup(demo, "html.parser")
	for child in soup.find('tbody').children:
		if isinstance(child, bs4.element.Tag):
			tds = child('td')
			ulist.append([tds[0].string, tds[1].string, tds[3].string])
def printResult(ulist, num):
	tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}"
	print(tplt.format("排名", "学校名称", "总分", chr(12288)))
	for i in range(num):
		u = ulist[i]
		#print(u)
		print(tplt.format(u[0], u[1], u[2], chr(12288)))


def main():
	url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html'
	ulist = []
	txt = getText(url)
	#print(txt)
	doData(ulist, txt)
	printResult(ulist, 20)
main()

猜你喜欢

转载自blog.csdn.net/m0_38015368/article/details/80099229