python爬取表单 #AttributeError: 'NoneType' object has no attribute 'children' 错误

我看的是一个mooc老师的python爬虫课
然后我照本宣科地写了以下代码
结果报错

#CrawUnivRankingA.py
import requests
from bs4 import BeautifulSoup
import bs4
 
def getHTMLText(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""
 
def fillUnivList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find('tbody').children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr('td')
            ulist.append([tds[0].string, tds[1].string, tds[3].string])
 
def printUnivList(ulist, num):
    print("{:^10}\t{:^6}\t{:^10}".format("排名","学校名称","总分"))
    for i in range(num):
        u=ulist[i]
        print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2]))
     
def main():
    uinfo = []
    url = 'https://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html'
    html = getHTMLText(url)
    fillUnivList(uinfo, html)
    printUnivList(uinfo, 20) # 20 univs
main()

错的原因

  • URL是错的 2016的不能用了 改成2019 或2018
  • 整块代码好像本身也有问题

先看看框架吧
在这里插入图片描述

以下是改正后的

# -*- coding: utf-8 -*-

import requests
from bs4 import BeautifulSoup as bs   #简写
import bs4


def getHTMLText(url):
    try:
        r = requests.get(url, timeout=30)
        r.encoding = "utf-8"
    except Exception as e:
        print(e)
        return "error"
    return r


def fillUnivList(ulist, html):
    soup = bs(html.text, 'html.parser')
    for tr in soup.find('tbody').children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr('td')
            ulist.append([tds[0].string, tds[1].string, tds[2].string])


def printUnivList(ulist, num):
    for i in range(num):
        u = ulist[i]
        print("学校编号:%s\t学校名称:%s\t学校地点:%s\t" % (u[0], u[1], u[2]))


if __name__ == "__main__":
    uinfo = []
    url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html'
    html = getHTMLText(url)
    if html != "error":
        fillUnivList(uinfo, html)
        printUnivList(uinfo, 20)

发布了60 篇原创文章 · 获赞 18 · 访问量 5247

猜你喜欢

转载自blog.csdn.net/szuwaterbrother/article/details/105186431