Python爬取网页title

from urllib.request import urlopen
from urllib.error import HTTPError,URLError
from bs4 import BeautifulSoup

引入BeautifulSoup

下面是getTitle函数

def getTitle(url):
    try:
        html=urlopen(url)
    except (HTTPError,URLError) as e:
        return None
    try:
        bsObj=BeautifulSoup(html.read())
        title=bsObj.body.h1
    except AttributeError as e:
        return none
    return title
title=getTitle("http://www.pythonscraping.com/pages/page1.html")
if title==None:
    print("Title could not be found!")
else:
    print(title)


猜你喜欢

转载自blog.csdn.net/HurryRabbit/article/details/79147910