import requests
from urllib.parse import urlencode
import os
from hashlib import md5
from multiprocessing.pool import Pool
def get_page(offset):
params = {
'aid': '24',
'app_name': 'web_search',
'offset': offset,
'format': 'json',
'keyword': '街拍',
'autoload': 'true',
'count': '20',
'cur_tab': '3'
}
url = 'https://www.toutiao.com/api/search/content/?' + urlencode(params)
header={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
try:
response = requests.get(url,headers=header)
if response.status_code == 200:
#print('连接成功')
return response.json()
except requests.ConnectionError:
print("连接失败")
return None
def get_images(json):
#print(json)
if json.get('data'):
for item in json.get('data'):
title = item.get('title')
images = item.get('image_list')
if images:
for image in images:
yield{
'image': image.get('url'),
'title': title
}
def save_images(item):
if not os.path.exists(item.get('title')):
os.mkdir(item.get('title'))
try:
response = requests.get(item.get('image'))
if response.status_code == 200:
# 这里使用 将图片的unicode
file_path = '{0}/{1}.{2}'.format(item.get('title').replace(' ',''), md5(response.content).hexdigest(), 'jpg')
if not os.path.exists(file_path):
with open(file_path,'wb') as f:
f.write(response.content)
else:
print('Already Download', file_path)
except requests.ConnectionError:
print('Failed to Save Image')
def main(offset):
json = get_page(offset)
for item in get_images(json):
#print(item)
save_images(item)
GROUP_START = 1
GROUP_END = 1
if __name__ == '__main__':
os.chdir('jiepai') # 打开文件夹,在该文件夹下产生文件
pool = Pool() # Pool 多进程下载
groups = ([x*20 for x in range(GROUP_START, GROUP_END + 1)])
pool.map(main, groups)
pool.close()
pool.join()
Python: rastrea los titulares de hoy
Supongo que te gusta
Origin blog.csdn.net/qq_43078427/article/details/115182378
Recomendado
Clasificación