import urllib.request import urllib.parse import urllib.error import re import os import ssl ssl._create_default_https_context = ssl._create_unverified_context path = "./images" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36", "referer": "https://www.mzitu.com/xinggan/" } def handler_request(url, pageIndex): url = url + str(pageIndex) # 构建请求对象 request = urllib.request.Request(url=url, headers=headers) return request def get_images_url(content, basePath): patternNames = re.compile(r'<li>.*?<img .* alt=(.*?) .*? />.*?') patternHrefs = re.compile(r'<li><a href=(.*?) .*?>.*?') alts = patternNames.findall(content, re.S) hrefs = patternHrefs.findall(content, re.S) image_map = {} for i in range(len(hrefs)): key = alts[i][1: len(alts[i]) - 1] image_map[key] = hrefs[i] for item in image_map.items(): image_category_response(item, basePath) def image_category_response(item, basePath): alt = item[0] save_folder = os.path.join(basePath, alt) if not os.path.exists(save_folder): os.mkdir(save_folder) baseurl = item[1][1: len(item[1]) -1] pageCount = 1000 try: for pageIndex in range(pageCount): page_url = baseurl + "/" + str(pageIndex) try: # 构建请求对象 request = urllib.request.Request(url=page_url, headers=headers) # 发送请求 response = urllib.request.urlopen(request) content = response.read().decode() imgPattern = re.compile(r'<div class="main-image"><p>.*?<img src=(.*?) .*? />.*?') imgUrl = imgPattern.findall(content, re.S) download_images(imgUrl[0], save_folder) except urllib.error.URLError as e: raise TypeError("最大页面数{0}".format(pageIndex - 1)) except Exception as e: print(e) def download_images(url, save_path): url = url[1: len(url) - 1] print(url) # 构建请求对象 request = urllib.request.Request(url=url, headers=headers) # 发送请求 response = urllib.request.urlopen(request) filename = url.split('/')[-1] with open(os.path.join(save_path, filename), 'wb') as fb: fb.write(response.read()) def parse_pages(content): print(content) def main(): url = 'https://www.mzitu.com/xinggan/page/' start_page = int(input("Please enter the starting page number:" )) End_page = int (the INPUT ( " Please enter an end page number: " )) # Creating a root folder IF not os.path.exists (path): os.mkdir (path) for pageIndex in the Range (START_PAGE, end_page + 1 ): Print ( " ........... start the download page {0} " .format (pageIndex)) # create a folder save_path = create_folder (pageIndex) # generate Request Request = handler_request (url, pageIndex ) # send the requested object, to obtain the corresponding content Response= The urllib.request.urlopen (Request) Content = response.read (). Decode () # parse the contents, extract pictures and download get_images_url (Content, save_path) Print ( " ........... the end of the download page {0} " .format (the pageIndex)) DEF create_folder (the pageIndex): the save_path = the os.path.join (path, STR (the pageIndex)) IF Not os.path.exists (the save_path): os.mkdir (the save_path) return save_path.replace ( " \\ " , " / " ) + " / " IF the __name__ == "__main__": main()