Python crawler Practice - 3. Use crawler extract the return value, the analog interfaces way dictionary

The web interface way dictionary, in fact, can reptiles analog input key, for the splicing FormData way dictionary interface, crawling return value, actually Ajax dynamically generated translation, so that the external translation look implements analog interface, reptile equivalent analog browser calls the way dictionary web interface, in fact, really speak the words, direct calls to the proper way web interface, pass json parameter on it, so do not bother, but they landed web crawlers simulated, enter the keyword get translation process.

Browser input operation, analytical way dictionary and translation of web interfaces url format

#爬虫模拟调用有道词典web接口调用
from urllib import request
from urllib import parse
import re


class YoudaoTranslator:

    def __init__(self, key):
        self.key = key

    def __getData(self):
        # 构造 有道词典web接口所需的Form data
        formdata = {
            "i": self.key,
            "from": "AUTO",
            "to": "AUTO" ,
            "smartresult": "dict",
            "client": "fanyideskweb",
            "salt": "15763837022114",
            "sign": "2b12fd214e066f53bc3455a126d7a509",
            "ts": "1576383702211",
            "bv": "5575008ba9785f184b106838a72d6536",
            "doctype": "json",
            "version": "2.1",
            "keyfrom": "fanyi.web",
            "action": "FY_BY_REALTlME"
        }
        data = parse.urlencode(formdata).encode(encoding="utf-8")
        return data

    def __getPage(self):
        #获得模拟浏览器请求,获得Ajax返回值
        header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}
        url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"

        req = request.Request(url, data=YoudaoTranslator.__getData(self), headers=header)
        res = request.urlopen(req).read().decode()
        return res

    def __Pat(self):
        #解析ajax返回json字符串,正则匹配获取翻译值
        pat = r'"tgt":"(.*?)"}]]'
        result = re.findall(pat, YoudaoTranslator.__getPage(self))
        print(result[0])
        return result

    def Translator(self):
        YoudaoTranslator.__Pat(self)


if __name__ == '__main__':

    i = YoudaoTranslator("人格心理学")
    i.Translator()

然后是运行结果

Guess you like

Origin www.cnblogs.com/liuchaodada/p/12044217.html