汽车之家反爬

只是为练习字体反爬,代码垃圾的一笔仅供参考。

#!/usr/bin/env python  
# encoding: utf-8  
from requests_html import HTMLSession
import re
import os
from fontTools.ttLib import TTFont
f_dict={}
def create_font(font_url):
    # 列出已下载文件
    font_file=font_url.split('/')[-1]

    if not os.path.exists("./fonts"):
        os.makedirs("./fonts")
    file_list = os.listdir('./fonts')

    if font_file not in file_list:
        # 未下载则下载新库
        print('不在字体库中, 下载:', font_file)
        new_file = session.get(font_url).content
        with open('./fonts/' + font_file, 'wb') as f:
            f.write(new_file)
        font = TTFont('./fonts/' + font_file)
        gly_list = font.getGlyphOrder()[1:]
    else:
        font = TTFont('./fonts/' + font_file)
        gly_list = font.getGlyphOrder()[1:]
    hanzi=['不','了','呢','更','是','四','小','七','三','多','得','一','着','下','十','少','长','二','六','远','左','地','短','九','五','上','坏','很','右','低','高','矮','八','近','大','好','的','和']
    for number,gly in enumerate(gly_list):
        f_dict.setdefault(gly.lower().replace('uni','&#x'),hanzi[number])
session=HTMLSession()
req=session.get("https://club.autohome.com.cn/bbs/thread/bb8c36ced93ce182/74203500-1.html")
source=req.text
plat=re.compile("'\),url\('(.*?)'\)")
font_url="http:"+plat.findall(source)[0]
create_font(font_url)
plt=re.compile("<div class=\"tz-paragraph\">(.*?)</div>")
f_string=plt.findall(source)[1].replace("<span style='font-family: myfont;'>",'').replace('</span>','')

for k,v in f_dict.items():
    f_string=f_string.replace(k+';',v)
print(f_string.strip().replace('&nbsp;',''))

猜你喜欢

转载自www.cnblogs.com/c-x-a/p/9288841.html