只是为练习字体反爬,代码垃圾的一笔仅供参考。
#!/usr/bin/env python
# encoding: utf-8
from requests_html import HTMLSession
import re
import os
from fontTools.ttLib import TTFont
f_dict={}
def create_font(font_url):
# 列出已下载文件
font_file=font_url.split('/')[-1]
if not os.path.exists("./fonts"):
os.makedirs("./fonts")
file_list = os.listdir('./fonts')
if font_file not in file_list:
# 未下载则下载新库
print('不在字体库中, 下载:', font_file)
new_file = session.get(font_url).content
with open('./fonts/' + font_file, 'wb') as f:
f.write(new_file)
font = TTFont('./fonts/' + font_file)
gly_list = font.getGlyphOrder()[1:]
else:
font = TTFont('./fonts/' + font_file)
gly_list = font.getGlyphOrder()[1:]
hanzi=['不','了','呢','更','是','四','小','七','三','多','得','一','着','下','十','少','长','二','六','远','左','地','短','九','五','上','坏','很','右','低','高','矮','八','近','大','好','的','和']
for number,gly in enumerate(gly_list):
f_dict.setdefault(gly.lower().replace('uni','&#x'),hanzi[number])
session=HTMLSession()
req=session.get("https://club.autohome.com.cn/bbs/thread/bb8c36ced93ce182/74203500-1.html")
source=req.text
plat=re.compile("'\),url\('(.*?)'\)")
font_url="http:"+plat.findall(source)[0]
create_font(font_url)
plt=re.compile("<div class=\"tz-paragraph\">(.*?)</div>")
f_string=plt.findall(source)[1].replace("<span style='font-family: myfont;'>",'').replace('</span>','')
for k,v in f_dict.items():
f_string=f_string.replace(k+';',v)
print(f_string.strip().replace(' ',''))