也是刚学的时候写的代码 学会了requests 方便多了

import requests
import time
import re
import os
i = 0
num = 0
newurl = "/blog/?id=800563506"
for i in range(2):
i += 1
baseurl = "https://www.duitang.com"
baseurl2 = newurl
url = baseurl + baseurl2
rsp = requests.get(url)
#print(type(rsp))
data = rsp.text
#print(type(data))


#获取帖子中间 图片名称 和想要爬取得图片信息块
s =r'<div class="de-img"(.*?)</div>'
pattern = re.compile(s,re.S)
keydata = pattern.findall(data)
keydata = "".join(keydata)
#print(type(keydata))
#print(keydata)

#获取下一张图的后半截 url
s1 = r'<a class="shownext" href="(.*?)" data-nextid='
pattern1 = re.compile(s1,re.S)
newurl = pattern1.findall(keydata)[0]
newurl = "".join(newurl)
print(type(newurl))
print(newurl)


#获取名称
s2 =r'<img alt="(.*?)" id='
pattern2 = re.compile(s2,re.S)
mingcheng = pattern2.findall(keydata)[0]
#mingcheng = "".join(mingcheng)
print(mingcheng)

#获取图片地址
s3 =r'src="(.*?)" style='
pattern3 = re.compile(s3,re.S)
tupian = pattern3.findall(keydata)[0]
tupian = "".join(tupian)
print(tupian)

tupiandizhi = requests.get(tupian) #对图片地址发出请求
#print(type(tupiandizhi))

#给名字添加数字 以免被后来者覆盖
num = num + 1
num2 = str(num)
savepng = open("F://"+ mingcheng + num2 +".png","wb").write(tupiandizhi.content) #将图片保存
#open(文件路径,名字+数字+后缀,写入方法) #.content是将数据直接返回bytes类型,这样就可以保存图片 音乐等文件

猜你喜欢

转载自www.cnblogs.com/cwkcwk/p/9461382.html