Small reptiles Exercise: heap sugar image crawl

#菠萝tang
#coding:utf-8
import urllib2
import urllib
import os
import time
import json
import jsonpath

def handle_request(url, sort, page ):
    qurey_string = '&type=feed&include_fields=top_comments%2Cis_root%2Csource_link%2Citem%2Cbuyable%2Croot_id%2Cstatus%2Clike_count%2Clike_id%2Csender%2Calbum%2Creply_count%2Cfavorite_blog_id&_type=&'
    url_use = url + sort + qurey_string + 'start=' + str(24*page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
    }
    request = urllib2.Request(url=url_use, headers=headers)
    return request

def download_image(content):
    unicodestr = json.loads(content)
    url_list = jsonpath.jsonpath(unicodestr, "$..path")
    for li in url_list:
        dirname = 'DuiTang'
        if notos.path.exists (dirname): 
            os.mkdir (dirname) 
        filename = li.split ( ' / ' ) [-. 1 ]
         # Print (filename) 
        filepath = dirname + ' / ' + filename
         # Print (filepath) 
        the urllib. urlretrieve (Li, filepath) 
        the time.sleep ( . 1 ) 


DEF main (): 
    URL = ' https://www.duitang.com/napi/blog/list/by_search/?kw= ' 
    START_PAGE = int (INPUT ( " Please input initial gripping position (24 a portion of the graph): "  ))
    end_page = int (INPUT ( " Please enter an end gripping position: " )) 
    Sort = the raw_input ( " Please enter the type of query: " ) 

    for Page in Range (. 1-START_PAGE , end_page):
         Print ( ' % s of section begins Download ...... ' % (Page + 1 )) 
        Request = handle_request (url, the Sort, Page) 
        content = urllib2.urlopen (Request) .read ()
         Print (content)
     #      # parse the contents, extracts all image links , download pictures 
        download_image (Content)
         Print ( '% S of partially downloaded ' % (Page +. 1 )) 
        the time.sleep ( 2 ) 


IF  the __name__ == ' __main__ ' : 
    main ()

# Use python2.7

# Heap sugar pictures show is in accordance with the json come, paging just a cover-up, the main parameters are: kw, and start position!

# Get json data you need to learn! ! !

#unicodestr = json.loads(content)

#url_list = jsonpath.jsonpath(unicodestr, "$..path")

Small reptiles Exercise: heap sugar image crawl

Guess you like