School for so long, get some experience and sharing:
Where: function open1 () to access the development of the web page URL's;
function getp () is accessible view of the small sister & Beauty, dark blue even crawling number of tags
function find () crawling URL portrait photographs
function save () the photo URL specified crawling to develop folder
main function down ()
import urllib.request
import os
def open1(url):
rep=urllib.request.Request(url)
rep.add_header('User-Agent',' Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0')
r=urllib.request.urlopen(rep)
h=r.read()
return h
def getp(url):
h=open1(url).decode('utf-8')
a=h.find("current-comment-page")+23
b=h.find(']',a)
return h[a:b]
def find(url):
h=open1(url).decode('utf-8')
tu=[]
a=h.find('img src=')
while a!=-1:
b=h.find('.jpg',a,a+255)
if b!=-1:
tu.append(h[a+9:b+4])
else:
b=a+9
a=h.find('img src=',b)
return tu
def save(f,image):
for i in image:
a=i.split('/')[-1]
with open(a,'wb') as f1:
img=open1('http:'+i)
f1.write(img)
def down(f='爬虫',p=15):
os.mkdir(f)
os.chdir(f)
url='http://jandan.net/ooxx'
pn=int(getp(url))
for i in range(p):
purl=url+'/MjAyMDAyMjktMjA'+chr(97+i)+'#comments'
image=find(purl)
save(f,image)
if __name__=='__main__':
down()
在这里插入代码片