python爬取www.mzitu.com性感页面的图片

import os

import time

import urllib.request

import urllib.parse

from bs4 import BeautifulSoup

def opener_hander(request):

    proxy = {
        'http': '119.27.177.169:80'
    }
    hander = urllib.request.ProxyHandler(proxies=proxy)
    response = urllib.request.build_opener(hander).open(request)
    return response

def request_header(url,page):

    if page == 1:
        url = 'http://www.mzitu.com/xinggan/'
    else:
        url = url.format(page)
    headers = {
        'Referer': 'http://mzitu.com/',
        'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
    }
    request = urllib.request.Request(url=url,headers=headers)
    return request

def request_img(url):

    headers = {
        'Referer': 'http://mzitu.com/',
        'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
    }
    requestimg = urllib.request.Request(url=url,headers=headers)
    return requestimg

def tupian_xiazai(content,page):

    soup = BeautifulSoup(content,'lxml')
    img_list = soup.select('.postlist > ul > li > a > img ')
    dirname = '性感图片'
    if not os.path.exists(dirname):
        os.mkdir(dirname)
    print("正在下载第 %s 页..... " % page)
    for i in range(len(img_list)):
        filename = img_list[i]['alt']
        imgurl = img_list[i]['data-original']
        imgrequest = request_img(imgurl)
        response1 = opener_hander(imgrequest)
        wenjian = filename + '.' + imgurl.split('.')[-1]
        print("开始下载第%s张" % (i+1))
        filefath = os.path.join(dirname, wenjian)
        with open(filefath,'wb') as fp:
            fp.write(response1.read())
    time.sleep(1)
    print(" %s 页下载完毕 " % page)

def main():

    start_page = int(input("请输入开始下载的页数:"))
    end_page = int(input("请输入结束下载的页数:"))
    
    url = 'http://www.mzitu.com/xinggan/page/{}/'
    
    for page in range(start_page,end_page+1):
        request2 = request_header(url,page)
        response2 = opener_hander(request2)
        content = response2.read().decode('utf8')
        tupian_xiazai(content,page)

if name == 'main':

    main()

猜你喜欢

转载自blog.csdn.net/LoveL_T/article/details/83514861
今日推荐