如何分页爬去数据--beautisoup

'''本次爬取讲历史网站'''
#!usr/bin/env python

#-*- coding:utf-8 _*-
"""
@author:Hurrican
@file: 分页爬取数据.py
@time: 2018/11/03 9:30

"""
from bs4 import BeautifulSoup
import requests

def get_urls():
urls = ['http://www.jianglishi.cn/jiemi/page_{}.html'.format(str(i)) for i in range(2, 21)]
urls.insert(0, 'http://www.jianglishi.cn/jiemi/')
return urls

def get_title():
for a1 in get_urls():
web_data = requests.get(a1)

web_data.encoding = 'utf-8'
web = web_data.text
soup = BeautifulSoup(web, 'html5lib')
soup1 = soup.findAll(name='div', attrs={'class': 'title'})
for piece in soup1:
title = piece.a.string
print(title)

if __name__ == '__main__':
get_title()
运行结果:
 

猜你喜欢

转载自www.cnblogs.com/wujf-myblog/p/9906858.html