获取今日头条街拍图片

import requests,time
from urllib.parse import  urlencode
import os
from hashlib import md5

class ToutiaoSpider:
    def __init__(self):
        self.params = {
            'offset': None,
            'format': 'json',
            'keyword': '街拍',
            'autoload': 'true',
            'count': '20',
            'cur_tab': '1',
            'from': 'search_tab',
            'pd': 'synthesis'
        }
        self.ua = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
        self.headers = {
            'user - agent': self.ua,
            'referer': 'https://www.toutiao.com/search/?keyword=街拍',
            'content-type': 'application/x-www-form-urlencoded'
        }

    def get_page(self,offset):
        '''获取Ajax加载的数据'''
        self.params['offset'] = offset
        p = urlencode(self.params)
        url = 'https://www.toutiao.com/search_content/?' + p
        print(url)
        try:
            r = requests.get(url)
            if r.status_code == 200:
                r.encoding='utf-8'
                return r.json()
        except Exception as e:
            return None

    def get_images(self,json_data):
        '''下载图片ַ'''
        if json_data.get('data'):
            for item in json_data.get('data'):
                title = item.get('title')
                images = item.get('image_list')
                for image in images:
                    items = {}
                    items['image'] = image.get('url')
                    items['title'] = title
                    yield items

    def save_image(sele,item):
        """保存图片"""
        if not os.path.exists(item.get('title')):
            os.mkdir(item.get('title'))
            try:
                url = item.get('image')
                print('download running:','http:'+url)
                r = requests.get('http:'+item.get('image'))
                if r.status_code == 200:
                    file_path = '{}/{}.{}'.format(item.get('title'),md5(r.content).hexdigest(),'jpg')
                    if not os.path.exists(file_path):
                        with open(file_path,'wb') as fp:
                            fp.write(r.content)
                        print('download finished')
                    else:
                        print('Already Download,',file_path)
                else:
                    print('页面下载失败!')
            except Exception as e:
                print(e)
                print('Failed to save Image')

    def run(self,offsets):
        for offset in offsets:
            json_data = self.get_page(offset)
            #images = self.get_images(json_data)
            for item in self.get_images(json_data):
                print(item)
                self.save_image(item)
            time.sleep(1)

if __name__ == '__main__':
    t = ToutiaoSpider()
    offsets = ([i*20 for i in range(0,15)])
    t.run(offsets)

猜你喜欢

转载自www.cnblogs.com/wl443587/p/10295157.html