Achieve crawling pictures

Reference URL: HTTPS: //www.ivsky.com/ from requests_html Import HTMLSession 
the session = HTMLSession () 
BASE_URL = ' https://www.ivsky.com ' # Get Picture Page link DEF get_page_url ():
     for i in   the Range ( 1,21 ):
         the yield ' https://www.ivsky.com/tupian/ziranfengguang/index_{}.html ' .format (I) #  Get link == general view of "" All individual graph # test: 
# Session.get = R & lt (URL = 'HTTPS: //www.ivsky.com/tupian/ziranfengguang/index_1.html') 
# 
#








 




 BASE_URL = 'https://www.ivsky.com'
# element_list = r.html.find('.il_img a')
# for element in element_list:
#     # print(element.attrs.get('href'))
#     a_url = BASE_URL + element.attrs.get('href')
#     print(a_url)
#     title = element.attrs.get('title')
#     # 进入到具体的图片内部
#     h = session.get(url=a_url)
#     element_list = h.html.find('.il_img img')
#     for element in element_list:
#         url = element.attrs.get('src')[15:]
#         url_detail = BASE_URL + url
#         print(url_detail)

def get_url_page(url):
    r = session.get(url=url)
    element_list = r.html.find('.il_img a')
    for element in element_list:
        a_url = BASE_URL + element.attrs.get('href')
        title = element.attrs.get('title')
        # 进入到具体的图片内部
        h = session.get(url=a_url)
        element_list = h.html.find('.il_img img')
        for element in element_list:
            url = element.attrs.get('src')[15:]
            url_detail = BASE_URL + url
            save(url_detail,title)

import os
def save(url,title):
    base_url = '风景图片'
    file_path = os.path.join(base_url,title+'.png')
    r = session.get(url=url)
    with open(file_path,'wb')as f:
        f.write(r.content)
    print(' {} Images saved successfully ' .format (title)) 

IF  __name__ == ' __main__ ' :
     for page_url in get_page_url (): 
        get_url_page (page_url) 


# can be considered a single line print progress bar

 

Guess you like

Origin www.cnblogs.com/changwenjun-666/p/11323262.html