Directional web crawling University Rankings

Web links:

http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html

effect:

 

 

# coding=utf-8
import requests
from bs4 import BeautifulSoup
import bs4
def getHTML(url):
    try:
        r=requests.get(url,timeout=30)   #获取请求
        r.raise_for_status                 #请求状态
        r.encoding=r.apparent_encoding   #编码
        return r.text
    except:
        return ""
def getUnlist(unifo,html):
    soup=BeautifulSoup(html,"html.parser")   
    for tr in soup.find('tbody' ) .Find_all ( ' tr ' ): # found in all tr tbody 
        TDS = tr.find_all ( ' TD ' ) 
        unifo.append ([TDS [ 0 ]. String , TDS [ . 1 ]. String , TDS [ . 3 ] . String ]) 
        
DEF printUnlist (unifo, NUM): 
    TPLT = " {0: 10} ^ \ T {. 1: ^ {15}. 3} \ {2 T: 10} ^ " 
    Print (tplt.format ( " rank " , " university name " , " score " , CHR ( 12288))) #用中文字符补全
    for i in range(num):
        t=unifo[i]
        print(tplt.format(t[0],t[1],t[2],chr(12288)))
def main():
    url="http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html"
    html=getHTML(url)
    unifo=[]
    getUnlist(unifo,html)
    printUnlist(unifo,20)
main()

 

Guess you like

Origin www.cnblogs.com/ww123/p/11616852.html