import requests
from lxml import etree
import re
import os
def url_values():
tree = etree.HTML(response)
list_jianli_Download_page = tree.xpath(("//div[@class='bggray clearfix pt20']/div[3]//div[@id='container']/div"))
# print(len(list_jianli_Download_page))
for url in list_jianli_Download_page:
jump_url.append(url.xpath('./a/@href')[0])
def Download_page():
for downlaodurl in jump_url:
response = requests.get(url=downlaodurl,headers=headers).text
tree = etree.HTML(response)
rex = "http://.*?.rar"
re_value = re.findall(rex,response)
downlaod_url.append(re_value[0])
# download_url = tree.xpath('//div[@class="bggray clearfix"]/div[2]//div[@class="down_wrap"]/div[2]/ul/li/a/@href')
name = tree.xpath('//div[@class="ppt_tit clearfix"]/h1/text()')[0]
file_name.append(name.encode("iso-8859-1").decode("utf-8")+'.rar')
def get_file():
a = 0
for name,url in zip(file_name,downlaod_url):
file = requests.get(url=url,headers=headers).content
if requests.get(url=url,headers=headers).status_code == 200:
a = a + 1
else:
print(f"文件{filename}下载失败")
continue
filename = "简历模板/"+ name
with open(filename,'wb') as fp:
fp.write(file)
print("下载完成",filename,a)
if __name__ == "__main__":
jump_url = []
downlaod_url = []
file_name = []
if not os.path.exists('./简历模板'):
os.mkdir('./简历模板')
url = "http://sc.chinaz.com/jianli/free.html"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"}
response = requests.get(url=url,headers=headers).text
url_values()
Download_page()
print(len(downlaod_url))
笔记思路入门参考