practice之Python爬取有道翻译页面

1.获取需求

import time
import random
import json
from day1.tuozhan_all import post

2.定义MD5加密

def md5_my(need_str):
    import hashlib
    #创建MD5对象
    md5_o = hashlib
    #需要有bytes作为参数
    #有str  转换为 bytes encode --> str.encode('utf-8')
    #有bytes转换为 str   decode --> bytes.decode('utf-8')
    sign_bytes = need_str.encode('utf-8')
    print(type(sign_bytes))
    #更新MD5 object的值
    md5_o.update(sign_bytes)
    sign_str = md5_o.hexdigest()
    return sign_str

3.定义函数-根据需求,寻找网站,设置请求头信息并调用加密方法,

def translate(kw):
    url = 'http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule'
    headers = {
        'Accept': 'application/json, text/javascript, */*; q=0.01',
        #'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection': 'keep-alive',
        #'Content-Length': '204',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Cookie': '[email protected];              OUTFOX_SEARCH_USER_ID_NCOO=1353878353.3763409; JSESSIONID=aaagYp6Ma33V-5wnZG_uw;         fanyi-ad-id=48707; fanyi-ad-closed=1; ___rl__test__cookies=1534352134401',
        'Host': 'fanyi.youdao.com',
        'Origin':' http://fanyi.youdao.com',
        'Referer': 'http://fanyi.youdao.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)AppleWebKit/537.36(KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest',
    }
    
    key - kw
    salt = int(time.time()*1000 + random.randint(0,10))
    print(salt)
    salt_str = str(salt)

    S = "fanyideskweb"
    D = "ebSeFb%=XZ%T[KZ)c(sy!"
    sign_str = S + key + salt_str + D
    # md5 加密的方法
    sign_md5_str = md5_my(sign_str)
    
    form = {
        'i': key,
        'from': 'AUTO',
        'to': 'AUTO',
        'smartresult': 'dict',
        'client': 'fanyideskweb',
        'salt': salt_str,
        'sign': sign_md5_str,
        'doctype': 'json',
        'version': '2.1',
        'keyfrom': 'fanyi.web',
        'action': 'FY_BY_REALTIME',
        'typoResult': 'false',
    }

4.获取及定位数据

    html_bytes = post(url,form,headers=headers)
    #将 json类型的字符串转换为字典格式
    res_dict = json.loads(html_bytes.decode('utf-8'))

    translate_res = res_dict['translateRequest'][0][0]['tgt']

    return translate_res

5.测试

if __name__ == '__main__':
    ret = translate('青青河边草')
    
    print('青青河边草的翻译是:' + ret)

6.以上测试结果

1534352974499

<class 'bytes'>
青青河边草的翻译是:Green river grass

猜你喜欢

转载自blog.csdn.net/zhan9le/article/details/81714001
今日推荐