煎蛋网妹子图网址:'http://jandan.net/ooxx/'
版本:python 3.x
import urllib.request import os import random def url_open(url): req = urllib.request.Request(url) req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0') #proxies = ['119.109.194.57:80','61.135.217.7:80'] #proxy = random.choice(proxies) # proxy_support = urllib.request.ProxyHandler({'http':proxy}) # opener = urllib.request.build_opener(proxy_support) # urllib.request.install_opener(opener) response = urllib.request.urlopen(url) html = response.read() return html #ҳ�� def get_page(url): html = url_open(url).decode('UTF-8') a = html.find('#comments">')+33 b = html.find(' </a>',a) return html[a:b] #ͼƬ��ַ def find_imgs(url): html = url_open(url).decode('UTF-8') img_addrs = [] a = html.find('img src=') while a!=-1: b = html.find('.jpg',a,a+255) if b != -1: #print('http:'+html[a+9,b+4]) img_addrs.append('http:'+html[a+9:b+4]) else: b = a+9 a = html.find('img src=',b) return img_addrs #����ͼƬ def save_imgs(folder,img_addrs): for each in img_addrs: filename = each.split('/')[-1] with open(filename,'wb+') as f: img = url_open(each) f.write(img) def download_mm(folder = 'OOXX',pages = 100): os.mkdir(folder) os.chdir(folder) url = 'http://jandan.net/ooxx/' page_num = int(get_page(url)) for i in range(pages): page_num -= i page_url = url + 'page-' + str(page_num) + '#comments' img_addrs = find_imgs(page_url) save_imgs(folder,img_addrs) if __name__=='__main__': download_mm()