版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Yonggie/article/details/89792102
beautiful soup库的小实践
首先得看下网页的特点,然后写爬虫实践一下。
import requests
import time
from bs4 import BeautifulSoup
import bs4
if __name__=="__main__":
response=requests.get("http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html")
response.encoding="utf-8"
soup=BeautifulSoup(response.text,"html.parser")
soup.encode("utf-8")
li=[]
for tr in soup.find("tbody").children:
if isinstance(tr,bs4.element.Tag):
tds=tr("td")
li.append([tds[0].string,tds[1].string,tds[2].string,tds[3].string])
tplt = "{0:^10}\t{1:{4}^10}\t{2:^10}\t{3:^10}"
print(tplt.format("排名","学校名称","城市","新生高考分数得分:",chr(12288)))
for item in li[:100]:
print(tplt.format(item[0],item[1],item[2],item[3],chr(12288)))