import requests from lxml import etree import time ''' 黑板爬虫闯关 网址:http://www.heibanke.com/lesson/crawler_ex00/ ''' start_time = time.time() start_url = "http://www.heibanke.com/lesson/crawler_ex00/" res = requests.get(start_url) res.encoding = "utf8" html = etree.HTML(res.content) Num = html.xpath("//h3/text()")[0][-5:] print(Num) while Num: url = "http://www.heibanke.com/lesson/crawler_ex00/" + str(Num) + "/" print(url) res = requests.get(url) res.encoding = "utf8" html = etree.HTML(res.content) Num = html.xpath("//h3/text()")[0] N = [] for i in Num: if i.isdigit() == True: N.append(i) Num = "".join(N) print(Num) end_time = time.time() print(end_time-start_time)