1. Download the webpage: OpenHtml.py
import urllib.request from urllib.parse import quote class HtmlLoader(object): def Open(self, chaper_url): if chaper_url is None: return None headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} req = urllib.request.Request(url=chaper_url, headers=headers) response = urllib.request.urlopen(req) if response.getcode() != 200: return None return response.read()
2. Analyze the image: ImageAnalysis.py
import OpenHtml from urllib.parse import quote import lxml.html class Imager: def Analysis(self,url): #Solve the Chinese search problem For: ? = not escape root_url = quote(url, safe= ' /:?= ' ) openhtml=OpenHtml.HtmlLoader() html = openhtml.Open(url) #Parse HTML into a unified format tree = lxml.html.fromstring(html) #Get the value of the src attribute through the xpath of lxml and return a list img = tree.xpath( ' //img [@class="BDE_Image"]/@src ' ) return img
3. Download image: LoadFile.py
import urllib.request as re class Loader: def callback(self,a,b,c): ''' callback function can be used to display progress @a: The number of data blocks that have been downloaded @b: the size of the data block @c: the size of the remote file ''' per=100.0*a*b/c if per>100: per = 100 print ( ' % .2f %% ' % per) def Down(self, url, filename): # dir = os.path.abspath('.') # work_path = os.path.join(dir, 'baidu.html') re.urlretrieve(url, filename, self.callback)
4. Start the file, that is, the test file main.py
#!/usr/bin/env python # coding=utf-8 import ImageAnalysis import LoadFile def downimge(): # url = "http://www.btany.com/search/桃谷绘里香-first-asc-1" url = "https://tieba.baidu.com/p/5475267611" imganalysis=ImageAnalysis.Imager() img = imganalysis.Analysis(url) #Iterate the list img and save the image in the current directory x= 0 download=LoadFile.Loader() for i in img: download.Down(i, 'C:\\Users\\luffy\\Desktop\\img\\%s.jpg' % x) x += 1 if __name__ == '__main__': downimge() # download = LoadFile.Loader() # download.Down('http://www.python.org/ftp/python/2.7.5/Python-2.7.5.tar.bz2', 'C:\\Users\\luffy\\Desktop\\img\\Python-2.7.5.tar.bz2')
The urlretrieve() function provided by the urllib module. The urlretrieve() method directly downloads the remote data to the local.
urlretrieve(url, filename=None, reporthook=None, data=None)
- The parameter filename specifies the save local path (if the parameter is not specified, urllib will generate a temporary file to save the data.)
- The parameter reporthook is a callback function, which is triggered when the server is connected and the corresponding data block is transmitted. We can use this callback function to display the current download progress.
- The parameter data refers to the data of the post server. This method returns a (filename, headers) tuple containing two elements. filename represents the path saved to the local, and header represents the response header of the server.