python +requests 实现爬取百度图片

利用python +requests 实现爬取百度图片 


#!/usr/bin/python
# -*- coding:utf-8 -*-
import requests
import json
import re
import os


class BaiduImage(object):

    def __init__(self):
        super(BaiduImage, self).__init__()

        self.page = 60  # 当前页数
        if not os.path.exists(r'./image'):
            os.mkdir(r'./image')

    def request(self):
        try:
            while True:
                request_url = 'http://image.baidu.com/search/avatarjson?tn=resultjsonavatarnew&ie=utf-8&word=%E7%BE%8E%E5%A5%B3&cg=girl&rn=60&pn=' + str(
                    self.page)
                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0',
                           'Content-type': 'test/html'}

                response = requests.get(request_url, headers=headers)

                if response.status_code == 200:
                    data = response.text
                    decode = json.loads(data)             # 把数据转换成一个map
                    self.download(decode['imgs'])

                self.page += 60

        except Exception as e:
            print(e)
        finally:
            response.close()

    def download(self, data):

        for d in data:

            url = d['objURL']

            pattern = re.compile(r'.*/(.*?)\.jpg', re.S)
            print('pattern', pattern)
            item = re.findall(pattern, url)

            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
            response = requests.get(url, headers=headers, stream=True)
            FileName = str('image/') + item[0] + str('.jpg')

            with open(FileName, "wb") as op:
                for chunk in response.iter_content(128):
                    op.write(chunk)


if __name__ == '__main__':
    bi = BaiduImage()
    bi.request()


本人亲测成功。有不懂的欢迎咨询

猜你喜欢

转载自blog.csdn.net/q1454739828/article/details/60593920