# Crawling webmaster material in the establishment of free templates Import Requests from lxml Import etree Import Random headers = { " the User-Agent " : " Mozilla / 5.0 (Windows NT 6.1) AppleWebKit / 537.36 (KHTML, like Gecko) Chrome / 68.0. Safari 3440.106 / 537.36 " , " connection " : " Close " # prevent connection pool has been occupied by the resource releases the connection is disconnected immediately after the pool request } # URL of the first page url_page_one = ' HTTP: // SC. chinaz.com/jianli/free.html ' # other generic page url url_demo = 'http://sc.chinaz.com/jianli/free_%d.html' start_page = int(input('enter a start page num:')) end_page = int(input('enter a end page num:')) for i in range(start_page,end_page+1): if i == 1: url = url_page_one else: url = url_demo%i response = requests.get(url=url, headers=headers) response.encoding = 'utf-8' Page_text = response.text # resolve the name and url details page of the Tree = etree.HTML (page_text) div_list = tree.xpath ( ' // div [@ the above mentioned id = "Container"] / div ' ) for div in div_list: detail_url div.xpath = ( ' ./p/a/@href ' ) [0] name = div.xpath ( ' ./p/a/text () ' ) [0] # of the page before initiation request detail_page_text = requests. GET (url = detail_url, headers = headers) .text # on the details page of the source data to parse: url address to download the corresponding tree =etree.HTML (detail_page_text) # find all Download li_list = tree.xpath ( ' // div [@ class = "clearfix MT20 downlist"] / UL / li ' ) # randomly selected tag contains a li (li Tags Download the url) Li = the random.choice (li_list) DOWNLOAD_URL = li.xpath ( ' ./a/@href ' ) [0] # for download resume the download link corresponding to click data = requests.get (url = download_url, = headers headers) .content # Content acquiring data stream in the form name = name + " .rar " with Open (name, " WB " ) AS fp: fp.write (the Data) Print (name, " Download successful " )