# -*- coding: utf-8 -*-
__author__ = 'Frank Li'
import requests
import json
class HotSpider(object):
def __init__(self):
self.url = "https://m.douban.com/rexxar/api/v2/subject_collection/filter_tv_domestic_hot/items?os=android&for_mobile=1&start={}&count=18&loc_id=108288"
self.session = requests.session()
self.headers = {"Referer": "https://m.douban.com/tv/chinese",
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Mobile Safari/537.36"}
def parse_2_list_from_str(self,url):
return json.loads(self.session.get(url,headers=self.headers).content.decode())['subject_collection_items']
def save_as_file(self,content_list,file):
with open(file,'a',encoding='utf-8') as f:
for content in content_list:
f.write(json.dumps(content,ensure_ascii=False))
f.write('\n')
def run(self):
url = self.url.format(0)
num = 0
total = 500
while num<total+18:
print(url)
self.save_as_file(self.parse_2_list_from_str(url),'hot.json')
num+=18
url=self.url.format(num)
if __name__ == '__main__':
hot_spider = HotSpider()
hot_spider.run()
python--爬取豆瓣热门国产电视剧保存为文件
猜你喜欢
转载自www.cnblogs.com/Frank99/p/9610069.html
周排行