#菠萝tang
#coding:utf-8
import urllib2
import urllib
import os
import time
import json
import jsonpath
def handle_request(url, sort, page ):
qurey_string = '&type=feed&include_fields=top_comments%2Cis_root%2Csource_link%2Citem%2Cbuyable%2Croot_id%2Cstatus%2Clike_count%2Clike_id%2Csender%2Calbum%2Creply_count%2Cfavorite_blog_id&_type=&'
url_use = url + sort + qurey_string + 'start=' + str(24*page)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
request = urllib2.Request(url=url_use, headers=headers)
return request
def download_image(content):
unicodestr = json.loads(content)
url_list = jsonpath.jsonpath(unicodestr, "$..path")
for li in url_list:
dirname = 'DuiTang'
if notos.path.exists (dirname):
os.mkdir (dirname)
filename = li.split ( ' / ' ) [-. 1 ]
# Print (filename)
filepath = dirname + ' / ' + filename
# Print (filepath)
the urllib. urlretrieve (Li, filepath)
the time.sleep ( . 1 )
DEF main ():
URL = ' https://www.duitang.com/napi/blog/list/by_search/?kw= '
START_PAGE = int (INPUT ( " Please input initial gripping position (24 a portion of the graph): " ))
end_page = int (INPUT ( " Please enter an end gripping position: " ))
Sort = the raw_input ( " Please enter the type of query: " )
for Page in Range (. 1-START_PAGE , end_page):
Print ( ' % s of section begins Download ...... ' % (Page + 1 ))
Request = handle_request (url, the Sort, Page)
content = urllib2.urlopen (Request) .read ()
Print (content)
# # parse the contents, extracts all image links , download pictures
download_image (Content)
Print ( '% S of partially downloaded ' % (Page +. 1 ))
the time.sleep ( 2 )
IF the __name__ == ' __main__ ' :
main ()
# Use python2.7
# Heap sugar pictures show is in accordance with the json come, paging just a cover-up, the main parameters are: kw, and start position!
# Get json data you need to learn! ! !
#unicodestr = json.loads(content)
#url_list = jsonpath.jsonpath(unicodestr, "$..path")