练手代码(提供的是框架需修改才可用)

import urllib.request
import urllib.parse
import re
import os
import time

def  handle_request(url,page):
	url = url + str(page)+'.html'+'/'
	headers = {"User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Mobile Safari/537.36"}
	requrst = urllib.request.Request(url=url,headers=headers)
	return request


def download_image (content):
	pattern = re.compile(r'<div class="center">.*?<img scr="(.*?)".*?></div',re.S)
	It = pattern.findall(content)
	#print(It)
	for image_src in It:
		image_src = 'https:' + image_src
		#发送请求下载图片
		#创建文件夹
		dirname = '糗图'
		if not os.path.exists(dirname):
			os.mkdir(dirname)
			#图片的名字叫什么
		filename = image_src.split('/')[-1]
		filepath = dirname + '/'  +  filename
		print('%s图片正在下载....'% filename)
		urllib.request.urlretrieve(image_src,filepath)
		print('%s图片结束下载....'% filename)
		time.sleep(1)


def main():
	url=''
	start_page = int(input('请输入起始页:'))
	end_page = int(input('请输入结束页码:'))
	for page in range(start_page,end_page+1):
		#生产请求对象
		print('第%s页开始下载...'% page)
		request = handle_request(url,page)
		content = urllib.request.urlopen(request).read().decode('gbk')  #已经下载好了整个的HTML
		#解析内容提取图片链接,下载图片
		download_image(content)
		print('第%s页结束下载'% page)
		print()
		print()
		time.sleep(2)




if __name__ == '__main__':
	main()  


发布了23 篇原创文章 · 获赞 0 · 访问量 516

猜你喜欢

转载自blog.csdn.net/weixin_46244909/article/details/104214573