import os
import time
import urllib.request
import urllib.parse
from bs4 import BeautifulSoup
def opener_hander(request):
proxy = {
'http': '119.27.177.169:80'
}
hander = urllib.request.ProxyHandler(proxies=proxy)
response = urllib.request.build_opener(hander).open(request)
return response
def request_header(url,page):
if page == 1:
url = 'http://www.mzitu.com/xinggan/'
else:
url = url.format(page)
headers = {
'Referer': 'http://mzitu.com/',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
}
request = urllib.request.Request(url=url,headers=headers)
return request
def request_img(url):
headers = {
'Referer': 'http://mzitu.com/',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
}
requestimg = urllib.request.Request(url=url,headers=headers)
return requestimg
def tupian_xiazai(content,page):
soup = BeautifulSoup(content,'lxml')
img_list = soup.select('.postlist > ul > li > a > img ')
dirname = '性感图片'
if not os.path.exists(dirname):
os.mkdir(dirname)
print("正在下载第 %s 页..... " % page)
for i in range(len(img_list)):
filename = img_list[i]['alt']
imgurl = img_list[i]['data-original']
imgrequest = request_img(imgurl)
response1 = opener_hander(imgrequest)
wenjian = filename + '.' + imgurl.split('.')[-1]
print("开始下载第%s张" % (i+1))
filefath = os.path.join(dirname, wenjian)
with open(filefath,'wb') as fp:
fp.write(response1.read())
time.sleep(1)
print(" %s 页下载完毕 " % page)
def main():
start_page = int(input("请输入开始下载的页数:"))
end_page = int(input("请输入结束下载的页数:"))
url = 'http://www.mzitu.com/xinggan/page/{}/'
for page in range(start_page,end_page+1):
request2 = request_header(url,page)
response2 = opener_hander(request2)
content = response2.read().decode('utf8')
tupian_xiazai(content,page)
if name == 'main':
main()
python爬取www.mzitu.com性感页面的图片
猜你喜欢
转载自blog.csdn.net/LoveL_T/article/details/83514861
今日推荐
周排行