google 翻译爬虫 java

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/m0_37681914/article/details/79089851
package com.fly.design.translation.google;


import com.fly.design.translation.Language;
import com.fly.design.translation.Translator;
import org.junit.Test;

import java.io.IOException;

import static org.junit.Assert.*;

/**
 * @author weijun.zou
 * Create on 2018/1/18
 */
public class GoogleTranslatorTest {
    @Test
    public void translate()
            throws IOException, InterruptedException {
        Translator translator = new GoogleTranslator();
        assertEquals("Dollars",translator.translate("美元", Language.CHINESE,Language.ENGLISH));
        assertEquals("Peaceful",translator.translate("平安", Language.CHINESE,Language.ENGLISH));
        assertEquals("美元",translator.translate("Dollars", Language.ENGLISH,Language.CHINESE));
        assertEquals("一个",translator.translate("a", Language.ENGLISH,Language.CHINESE));
    }
}
package com.fly.design.translation;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public enum Language {
    CHINESE,ENGLISH
}
package com.fly.design.translation;

import java.io.IOException;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public interface Translator {
    String translate(String value, Language input,Language output) throws IOException, InterruptedException;
}
package com.fly.design.translation.google;

import com.fly.design.translation.Language;
import com.fly.design.translation.Translator;

import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;

import static com.fly.design.translation.Language.*;

/**
 * @author weijun.zou
 * Create on 2018/1/17
 */
public class GoogleTranslator implements Translator {

    private static final Logger log = LoggerFactory.getLogger(GoogleTranslator.class);
    private static final String PATH = "https://translate.google.cn/translate_a/single";
    private static final NameValuePair[] ARG_LIST = {
            new BasicNameValuePair("client", "t"),
            new BasicNameValuePair("hl", toArgs(Language.CHINESE)),
            new BasicNameValuePair("dt", "at"),
            new BasicNameValuePair("dt", "bd"),
            new BasicNameValuePair("dt", "ex"),
            new BasicNameValuePair("dt", "ld"),
            new BasicNameValuePair("dt", "md"),
            new BasicNameValuePair("dt", "qca"),
            new BasicNameValuePair("dt", "rw"),
            new BasicNameValuePair("dt", "rm"),
            new BasicNameValuePair("dt", "ss"),
            new BasicNameValuePair("dt", "t"),
            new BasicNameValuePair("ie", "UTF-8"),
            new BasicNameValuePair("oe", "UTF-8")
    };


    private HttpClient client = HttpClients.createDefault();

    @Override
    public String translate(String value, Language input, Language output)
            throws IOException, InterruptedException {
        HttpGet httpGet = new HttpGet(createURI(value.trim(), input, output));
        return client.execute(httpGet, response -> {
            String data = EntityUtils.toString(response.getEntity());
            int start = data.indexOf('\"') + 1;
            int end = data.indexOf('\"', start);
            log.info(data);
            return data.substring(start, end);
        });
    }


    private static URI createURI(String value, Language input, Language output) {
        try {
            return new URIBuilder()
                    .setPath(PATH)
                    .setParameters(ARG_LIST)
                    .addParameters(List.of(
                            new BasicNameValuePair("sl", toArgs(input)),
                            new BasicNameValuePair("tl", toArgs(output)),
                            new BasicNameValuePair("tk", getTk(value)),
                            new BasicNameValuePair("q", value)
                    )).build();
        } catch (URISyntaxException e) {
            log.error("构建google翻译url出错", e);
            throw new RuntimeException(e);
        }
    }


    private static String getTk(String values) {
        final String KEY = "+-a^+6";
        final String LAST_KEY = "+-3^+b+-f";
        final long INIT_NUM = 406644L;
        final long REMAIN_NUM = (long) 1E6;
        final long DECIMAL_OR_NUM = 406644L;
        final long FIRST_OR_NUM = 3293161072L;
        long token = toNums(values.toCharArray()).stream()
                .reduce(INIT_NUM, (t, u) -> encode(t + u, KEY));
        token = encode(token, LAST_KEY) ^ FIRST_OR_NUM;
        token = token < 0 ? (token & Integer.MAX_VALUE) + Integer.MAX_VALUE : token;
        token %= REMAIN_NUM;
        return token + "." + (token ^ DECIMAL_OR_NUM);
    }

    private static List<Long> toNums(char[] values) {
        List<Long> valueList = new ArrayList<>();
        for (int i = 0; i < values.length; i++) {
            long value = values[i];
            if (value <= Byte.MAX_VALUE) {
                valueList.add(value);
            } else if (value <= 2048) {
                valueList.add(value >> 6 | 192);
            } else {
                if (i + 1 < values.length
                        && (values[i + 1] & 64512) == 56320) {
                    value = 65536 + ((value & 1023) << 10) + (values[++i] & 1023);
                    valueList.add(value >> 18 | 240);
                    valueList.add(value >> 12 & 63 | 128);
                } else {
                    valueList.add(value >> 12 | 224);
                    valueList.add(value >> 6 & 63 | 128);
                }
                valueList.add(value & 63 | 128);
            }
        }
        return valueList;
    }

    private static long encode(long value, String key) {
        for (int i = 0; i < key.length() - 2; i += 3) {
            char char2 = key.charAt(i + 2);
            long status = char2 >= 'a' ? char2 - 87 : Long.valueOf(char2 + "");
            status = key.charAt(i + 1) == '+' ? value >>> status : value << status;
            value = key.charAt(i) == '+' ? value + status & 4294967295L : value ^ status;
        }
        return value;
    }


    private static String toArgs(Language language) {
        return language == CHINESE ? "zh-CN"
                : language == ENGLISH ? "en"
                : "";
    }
}

猜你喜欢

转载自blog.csdn.net/m0_37681914/article/details/79089851