我觉得我写的爬图片的代码看着很舒服

闲着也是闲着:
目标网站:http://image.so.com
代码:

# _*_ coding:utf-8 _*_

import os
import requests
from time import sleep
from urllib.parse import urlencode

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36',
    'Referer': 'https://image.so.com/z?ch=beauty',
    # 指从哪个网址跳转过来的,一般的都会有这个请求头的限定
    'Host': 'image.so.com'
    # 这个是指要访问的主机
}


def parse_cover_page(page):

    html = requests.get(url='https://image.so.com/zj?ch=beauty&sn={page}&listtype=new&temp=1'.format(page=page),headers=headers,verify=False)
    sleep(2)
    json_res = html.json()
    if 'list' in json_res:
        for cover_image in json_res.get('list'):
            #cover_imgurl = cover_image.get('cover_imgurl')
            # 套图的封面图片链接
            group_title = cover_image.get('group_title')
            # 这个是套图的标题
            gro_id = cover_image.get('id')
            # 提取这个套图的id
            params = {'ch':'beauty','id':gro_id}
            params = urlencode(params)
            # 构造链接的参数
            url = 'https://image.so.com/zvj?'+params
            print(url)
            parse_one_group_pic(url,group_title)



def parse_one_group_pic(url,group_title):
        html = requests.get(url=url,headers=headers,verify=False)
        sleep(0.5)
        if 'list' in html.json():
            for pic in html.json().get('list'):
                pic_url = pic.get('pic_url')
                pic_index = pic.get('index')
                if not os.path.exists('D:/pictures/{nam}'.format(nam=group_title)):
                    os.makedirs('D:/pictures/{nam}'.format(nam=group_title))
                    # 对文件夹的创建
                with open('D:/pictures/{nam}/{pic_index}.jpg'.format(nam=group_title,pic_index=pic_index),'wb') as f:
                    f.write(requests.get(url=pic_url,headers=headers,verify=False).content)
                    sleep(0.2)

if  __name__ == "__main__":
    for page in range(0,91,30):
        parse_cover_page(page)

猜你喜欢

转载自blog.csdn.net/killeri/article/details/80403745
今日推荐