Crawler track of a technical analysis crawling

Program:

  The site is used for translation. Js code by analysis, which is mainly used for salt md5 sign and encrypt the input parameters to achieve. So there are two technical solutions:

Program: md5 function to add their own encrypted data post; Scheme II: js added using js2py package file, an analog implementation of the relevant js code

class YYY():
    def __init__(self):
        self.headers = {
            "X-Requested-With": "XMLHttpRequest",
            "Referer": "http://fanyi.×××.com/",
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
            ,
            "Origin":"http://fanyi.×××.com",
             " Host " : " fanyi.youdao.com " 
        } 
        self.session = requests.session () 
        self.session.headers = self.headers 


   # need to enter a channel interface before sending the post, carrying on cookie information, or post an error. 
    DEF enter_yuuu (Self): 
        RESP = self.session.get ( " HTTP: //fanyi.×××.com/ " ) 


    DEF download (Self): 
        input_word = the INPUT ( " Please enter the content to be translated: " ) 
        TIME1 STR = (int (the time.time () * 1000 )) 
        NUM= random.randint(0, 9)
        salt = "%s%s" % (time1, num)
        # ""fanyideskweb" + self.word + salt + "@6f#X3=cCuncYssPsuRUE""
        sign = hashlib.md5(("fanyideskweb" + input_word + salt + "n%A-rKaT5fb[Gy?;N5@Tj").encode()).hexdigest()
        data = {
            "i": input_word,
            "from": "AUTO",
            "to": "AUTO",
            "smartresult": "dict",
            "client": "fanyideskweb",
            "salt": salt,  # 时间戳加随机数"
            "sign": sign,  # n.md5("fanyideskweb" + e + i + "n%A-rKaT5fb[Gy?;N5@Tj")
            "ts": time1,  # 时间戳1568621840962
            "bv": "3ca2e6bf257529213f041a4416ab18ca",
            # 32 lowercase, md5 (version) version is the version number of the browser, its website information is entered; 5.0 (X11; Linux x86_64) AppleWebKit / 537.36 (KHTML, like Gecko) Chrome / 50.0.2661.102 Safari / 537.36 
            " DOCTYPE " : " JSON " ,
             " Version " : " 2.1 " ,
             " keyfrom " : " fanyi.web " ,
             " Action " : " FY_BY_REALTlME " 

        } 
        URL = " HTTP: //fanyi.×××.com/translate_o?smartresult=dict&smartresult=rule"
        resp = self.session.post(url, data=data)
        print(resp.content.decode())


if __name__ == '__main__':
    yyy = YYY()
    yyy.enter_yuuu()
    yyy.download()

 

Guess you like

Origin www.cnblogs.com/xuehaiwuya0000/p/11528325.html