Web crawler to climb the little sister of python photo

School for so long, get some experience and sharing:
Where: function open1 () to access the development of the web page URL's;
function getp () is accessible view of the small sister & Beauty, dark blue even crawling number of tags
Here Insert Picture Description
function find () crawling URL portrait photographs
function save () the photo URL specified crawling to develop folder
main function down ()

import urllib.request
import os

def open1(url):
    rep=urllib.request.Request(url)
    rep.add_header('User-Agent',' Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:71.0) Gecko/20100101 Firefox/71.0')
    r=urllib.request.urlopen(rep)
    h=r.read()
    return h

def getp(url):
    
    h=open1(url).decode('utf-8')
    a=h.find("current-comment-page")+23
    b=h.find(']',a)
    return h[a:b]
    

def find(url):
    h=open1(url).decode('utf-8')
    tu=[]
    a=h.find('img src=')
    while a!=-1:
        b=h.find('.jpg',a,a+255)
        if b!=-1:
            tu.append(h[a+9:b+4])
        else:
            b=a+9
        a=h.find('img src=',b)
    return tu
    

    
def save(f,image):
    for i in image:
        a=i.split('/')[-1]
        with open(a,'wb') as f1:
            img=open1('http:'+i)
            f1.write(img)
            
      
        

def down(f='爬虫',p=15):
    os.mkdir(f)
    os.chdir(f)
    url='http://jandan.net/ooxx'
    pn=int(getp(url))
    for i in range(p):
        
        purl=url+'/MjAyMDAyMjktMjA'+chr(97+i)+'#comments'
        image=find(purl)
        save(f,image)
if __name__=='__main__':
    down()

    

在这里插入代码片
Published 130 original articles · won praise 16 · views 30000 +

Guess you like

Origin blog.csdn.net/feiqipengcheng/article/details/104578975