Python之路__爬虫篇:新浪新闻爬取回顾(三)

获取首页分页 range(1, 2) 可以范围大一点

#3- 获取首页新闻分页 并拿出所有分页的新闻URL 并取得对应新闻的内容
import json

commonPage = 'http://api.roll.news.sina.com.cn/zt_list?channel=news&cat_1=gnxw&cat_2==gdxw1||=gatxw||=zs-pl||=mtjj&level==1||=2&show_ext=1&show_all=1&show_num=22&tag=1&format=json&page={}'

def getNewLists(commonPage):

    newsList = []

    for i in range(1, 2):
        newsPage = commonPage.format(i)
        # print(newsPage)

        reContent = requests.get(newsPage)
        reContent.encoding = 'utf-8'

        if reContent.status_code == 200:
            jsonData = json.loads(reContent.text)

            for newDic in jsonData['result']['data']:
                newsURL = newDic['url']

                #print(newsURL)

                newsList.append(getNewsDetail(newsURL))
        else:
            print('分页结束******')
            break

    return newsList

猜你喜欢

转载自blog.csdn.net/idlehand/article/details/79069643