from urllib.request import urlopen
from urllib.error import HTTPError,URLError
from bs4 import BeautifulSoup
引入BeautifulSoup
下面是getTitle函数
def getTitle(url):
try:
html=urlopen(url)
except (HTTPError,URLError) as e:
return None
try:
bsObj=BeautifulSoup(html.read())
title=bsObj.body.h1
except AttributeError as e:
return none
return title
title=getTitle("http://www.pythonscraping.com/pages/page1.html")
if title==None:
print("Title could not be found!")
else:
print(title)