python爬取知乎话题:"日常穿jk制服是怎样一种体验?"下的所有图片

'''
第一次尝试python爬取图片,请多指正
回答者的答案单独存放一个文件夹
'''
from urllib import request
import math
import requests
import json
import re
import os

def getPicture():
    count = 0#记录照片总数
    filcount = 0#文件夹个数
    for i in range(math.ceil(569/20)):#569是自己看到的回答总数,还没有实现自动获取回答数
        try:
            kv = {'user-agent': 'Mozillar/5.0'} 
            #知乎返回json数据的url中的limit是每次获取多少行数据,offset是每次从哪一个问题开始加载
            url = "https://www.zhihu.com/api/v4/questions/29814297/answers?include=data%5B%2A%5D.is_normal%2Cadmin_closed_comment%2Creward_info%2Cis_collapsed%2Cannotation_action%2Cannotation_detail%2Ccollapse_reason%2Cis_sticky%2Ccollapsed_by%2Csuggest_edit%2Ccomment_count%2Ccan_comment%2Ccontent%2Ceditable_content%2Cvoteup_count%2Creshipment_settings%2Ccomment_permission%2Ccreated_time%2Cupdated_time%2Creview_info%2Crelevant_info%2Cquestion%2Cexcerpt%2Crelationship.is_authorized%2Cis_author%2Cvoting%2Cis_thanked%2Cis_nothelp%2Cis_labeled%3Bdata%5B%2A%5D.mark_infos%5B%2A%5D.url%3Bdata%5B%2A%5D.author.follower_count%2Cbadge%5B%2A%5D.topics&limit=20&offset="+str(20*i)+"&platform=desktop&sort_by=default"
            r = requests.get(url, headers=kv)
            j = json.loads(r.text)
            for k in range(20):
                jpgurl=re.findall(r'data-original="(.*?)"', j["data"][k]["content"])
                name = j["data"][k]["author"]["name"]
                ID = j["data"][k]["id"]
                question = j["data"][k]["question"]["title"]
                mkfile = os.mkdir("C:/Users/23504/Desktop/Python知乎数据/知乎话题:日常穿JK制服是一种怎样的体验?/" + str(filcount) + "-" + name)
                filcount = filcount+1
                jpgcount = 0 #文件夹内照片序号,每次重新建立文件夹重新置零
                for m in range(0,len(jpgurl),2):
                    picture = request.urlopen(jpgurl[m]).read()
                    with open("C:/Users/23504/Desktop/Python知乎数据/知乎话题:日常穿JK制服是一种怎样的体验?/" + str(filcount-1) + "-" + name + "/" + str(jpgcount)+"-"+str(count)+ ".jpg",'wb') as file:
                        file.write(picture)
                    print("正在下载第"+str(filcount)+"个回答--回答者昵称:"+name+"--回答者ID:"+str(ID)+"--"+"问题:"+question+"--第" + str(count) + "张图片下载完成")
                    jpgcount = jpgcount + 1
                    count = count+1
        except:
            print("url链接无效")
if __name__ == "__main__":
    getPicture()


猜你喜欢

转载自blog.csdn.net/YiXiao1997/article/details/86655584