Python crawling play more Gallery

Gallery get to play more practiced hand crawling up their pictures, gif and video
found on the interface would be much easier, directly on the code

import urllib.request
import os
import json
import requests
import time
import re

#今日囧图的编号,gif编号,视频编号
jinristr = "138622" 
#生成文件时要对文件名字做处理
def validateTitle(title):
    rstr = r"[\/\\\:\*\?\"\<\>\|]"  # '/ \ : * ? " < > |'
    new_title = re.sub(rstr, "_", title)  # 替换为下划线
    return new_title
#接口
url = "http://tu.duowan.com/index.php?r=show/getByGallery&gid="
#生成有参数的接口
url = url + jinristr + "&_=" + str(int(round(time.time() * 1000)))
resp=requests.get(url)
dataJson=json.loads(resp.content)
listdaa=dataJson['picInfo']
listdaa.sort(key = lambda x:(int)(x['ding']))
str = ''
#循环找到的数据
for i in listdaa:
    print(i['ding'])
    print(i['add_intro'])
    str = str+i['add_intro']+"\n"
#在电脑中定义存放图片的位置并新建
path1='G:\\多玩爬取图片\\'+dataJson['gallery_title']
if os.path.exists(path1):
    pass
else:
    os.makedirs(path1)
#定义文件的连接
for i in listdaa:
	#后面的文件名应该是随着类型的不同而变化的
    picpath = path1+'\\'+validateTitle(i['add_intro']+'.gif')
    if not os.path.exists(picpath):
    	#下载
        urllib.request.urlretrieve(i['source'], picpath)
#写一个文本文件,里面放着他们的文件名字,这个其实没用的
txtpath = path1 + '\\1.txt'
f = open(txtpath,'w')
f.write(str)
f.close()

Climb down to like this
Here Insert Picture Description

Published 163 original articles · won praise 117 · views 210 000 +

Guess you like

Origin blog.csdn.net/u010095372/article/details/88706163
Recommended