First, prepare
1. the original address
2. Check the html found that regular web page is the biggest picture of class for the pic-large
Second, the code
1 import requests 2 import os 3 from bs4 import BeautifulSoup 4 5 url = 'http://www.win4000.com/wallpaper_detail_157712.html' 6 imgmkdir = 'D://Download//ghost_1//' 7 8 9 # 获取网页url 10 def getUrlList(): 11 imgUrlList = [] 12 for i in range(0, 10): 13 imgUrl = '' 14 url_split = url.split('.html') 15 if not i == 0: 16 imgUrl += url_split[0] + '_' + str(i) + '.html' 17 # print(imgUrl) 18 imgUrlList.append(imgUrl) 19 20 return imgUrlList 21 22 23 # 下载图片 24 def downImg(imgUrl): 25 try: 26 if not os.path.exists(imgmkdir): 27 os.mkdir (imgmkdir) 28 IF not os.path.exists (imgUrl): 29 r = requests.get (imgUrl) 30 r.raise_for_status () 31 # to use with their own statement can not manually close open file stream 32 imgpath imgUrl.split imgmkdir + = ( ' / ' ) [-. 1 ] 33 is # begin writing the file, wb represents writing binary file 34 is with Open (imgpath, ' WB ' ) AS F: 35 f.write (r.content) 36 Print (for imgUrl + ' [crawling Finish ' ) 37 [ the else : 38 is Print (imgUrl.split ( ' / ' ) [-. 1] + ' [] file already exists ' ) 39 the except Exception AS E: 40 Print ( " crawling failed " + STR (E)) 41 is 42 is 43 is # Get imgHtml tag 44 is DEF getContent (Soup): 45 for I in soup.find_all ( ' IMG ' , the class_ = ' PIC-Large ' ): 46 is imgsrc = I [ 'the src ' ] 47 IF imgsrc.find ( ' HTTP ' )> = 0 or imgsrc.find ( ' HTTPS ' )> = 0: 48 # Photo 49 downImg (imgsrc) 50 51 is 52 is # Get html The source URL 53 is DEF getHtmlByUrl (htmlUrl): 54 is the htmlText = requests.get (htmlUrl) .content 55 # use beautifulSoup parse HTML 56 is Soup = the beautifulSoup (the htmlText, ' lxml ' ) 57 is 58 return soup 59 60 61 def main(): 62 htmlUrlList = getUrlList() 63 for url in htmlUrlList: 64 htmltext = getHtmlByUrl(url) 65 getcontent(htmltext) 66 67 68 if __name__ == '__main__': 69 main()
Third, the results
IV Summary
Stupid code with the methods to obtain, first test the water