利用python +requests 实现爬取百度图片
#!/usr/bin/python
# -*- coding:utf-8 -*-
import requests
import json
import re
import os
class BaiduImage(object):
def __init__(self):
super(BaiduImage, self).__init__()
self.page = 60 # 当前页数
if not os.path.exists(r'./image'):
os.mkdir(r'./image')
def request(self):
try:
while True:
request_url = 'http://image.baidu.com/search/avatarjson?tn=resultjsonavatarnew&ie=utf-8&word=%E7%BE%8E%E5%A5%B3&cg=girl&rn=60&pn=' + str(
self.page)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0',
'Content-type': 'test/html'}
response = requests.get(request_url, headers=headers)
if response.status_code == 200:
data = response.text
decode = json.loads(data) # 把数据转换成一个map
self.download(decode['imgs'])
self.page += 60
except Exception as e:
print(e)
finally:
response.close()
def download(self, data):
for d in data:
url = d['objURL']
pattern = re.compile(r'.*/(.*?)\.jpg', re.S)
print('pattern', pattern)
item = re.findall(pattern, url)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
response = requests.get(url, headers=headers, stream=True)
FileName = str('image/') + item[0] + str('.jpg')
with open(FileName, "wb") as op:
for chunk in response.iter_content(128):
op.write(chunk)
if __name__ == '__main__':
bi = BaiduImage()
bi.request()