基于requests-re形式爬取长安大学信息工程学院分流信息

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/peter_xiazhen/article/details/82467364
import re
import bs4
import requests
from bs4 import BeautifulSoup

def getHTMLText(url):
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "error !"

def fill_list(ulist,html):
    soup = BeautifulSoup(html,"html.parser")
    for tr in soup.find('tbody').children:
        if isinstance(tr,bs4.element.Tag):
            tds = tr('td')
            #print(tds)
            print(tds[0].p.string)
            ulist.append([tds[0].p.string,tds[1].p.string])

    pass

def print_list(ulist):
    f=open("信息工程学院学生信息.txt",'w')
    for u in ulist:
        f.write('\t'.join(u))
        f.write('\n')
    f.close()


def main():
    url = "http://it.chd.edu.cn/info/1064/7271.htm"
    html = getHTMLText(url)
    ulis = []
    fill_list(ulis,html)
    print_list(ulis)

main()



猜你喜欢

转载自blog.csdn.net/peter_xiazhen/article/details/82467364