python--爬取豆瓣热门国产电视剧保存为文件

# -*- coding: utf-8 -*-
__author__ = 'Frank Li'
import requests
import json

class HotSpider(object):
    def __init__(self):
        self.url = "https://m.douban.com/rexxar/api/v2/subject_collection/filter_tv_domestic_hot/items?os=android&for_mobile=1&start={}&count=18&loc_id=108288"
        self.session = requests.session()
        self.headers = {"Referer": "https://m.douban.com/tv/chinese",
                        "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Mobile Safari/537.36"}

    def parse_2_list_from_str(self,url):
        return json.loads(self.session.get(url,headers=self.headers).content.decode())['subject_collection_items']

    def save_as_file(self,content_list,file):
        with open(file,'a',encoding='utf-8') as f:
            for content in content_list:
                f.write(json.dumps(content,ensure_ascii=False))
                f.write('\n')

    def run(self):
        url = self.url.format(0)
        num = 0
        total = 500
        while num<total+18:
            print(url)
            self.save_as_file(self.parse_2_list_from_str(url),'hot.json')
            num+=18
            url=self.url.format(num)

if __name__ == '__main__':
    hot_spider = HotSpider()
    hot_spider.run()

猜你喜欢

转载自www.cnblogs.com/Frank99/p/9610069.html