首先准备工作,下载字典包:详情参照https://www.elastic.co/guide/cn/elasticsearch/guide/current/hunspell.html 该链接提供各个国家字典包下载
项目为maven项目
pom引用hunspell
<dependency>
<groupId>dk.dren</groupId>
<artifactId>hunspell</artifactId>
<version>1.3.2</version>
</dependency>
源码如下,不同国家语言可能需要特殊处理,这只是最通用的
package com.translator.utils.Spell;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import dk.dren.hunspell.Hunspell;
public class TestHunspell {
private static Hunspell h;
private static Hunspell.Dictionary d;
private static String characters = "\\u0027\\u002e\\u0030-\\u0039\\u0041-\\u005a\\u005f\\u0061-\\u007a\\u00c0-\\u00cf\\u00d1-\\u00d6\\u00d8-\\u00dd\\u00df-\\u00ef\\u00f1-\\u00f6\\u00f8-\\u00fd\\u00ff\\u0102-\\u0107\\u010c-\\u0111\\u0118-\\u011b\\u011e-\\u011f\\u0128-\\u0129\\u0130-\\u0131\\u0139-\\u013a\\u013d-\\u013e\\u0141-\\u0144\\u0147-\\u0148\\u0150-\\u0155\\u0158-\\u015b\\u015e-\\u0165\\u0168-\\u0169\\u016e-\\u0171\\u0178-\\u017e\\u01a0-\\u01a1\\u01af-\\u01b0\\u0218-\\u021b\\u0300-\\u0301\\u0303\\u0309\\u0323\\u0386\\u0388-\\u038a\\u038c\\u038e-\\u038f\\u0391-\\u03a1\\u03a3-\\u03a9\\u03ac-\\u03af\\u03b1-\\u03ce\\u0401\\u0410-\\u044f\\u0451\\u0621-\\u063a\\u0640-\\u064b\\u064e-\\u0651\\u0664-\\u0667\\u067e\\u0686\\u0698\\u06a9\\u06af\\u06cc\\u06f0-\\u06f3\\u06f8-\\u06f9\\u1ea0-\\u1ef3\\u200c\\ufb57\\ufb59\\ufb7a-\\ufb7d\\ufb8b\\ufb8f-\\ufb91\\ufb93-\\ufb95\\ufbfd\\ufe8d-\\ufe8e\\ufe90-\\ufe92\\ufe95-\\ufed8\\ufedd-\\ufeec\\ufeee-\\ufeef\\ufef3-\\ufef4\\ufeff";
private static Pattern newAnalyzer;
public static void main(String[] args) throws Exception {
// 初始化字典
initSpellData("en_US");// 英语检查
System.out.println("开始拼写检查");
String checkString = "Hells hax how ar you?";
// 替换特殊字符
checkString = decodeWord(checkString);
// 正则匹配
newAnalyzer = Pattern.compile("[" + characters + "]+");
Matcher matcher = newAnalyzer.matcher(checkString);
while (matcher.find()) {
String suggestion = "";
int start = matcher.start();
int end = matcher.end();
String word = matcher.group();
if (d.misspelled(word)) {
List<String> suggestions = d.suggest(word);
if (!suggestions.isEmpty()) {
// print("\tTry:");
// result += ", suggestions: ";
for (String s : suggestions) {
// print(" "+s);
suggestion += s + " ";
}
}
}
System.out.println(suggestion);
}
}
/**
* 初始化Hunspell相关的字典及分词符
*
* @param language
* @throws Exception
*/
private static void initSpellData(String language) throws Exception {
try {
h = Hunspell.getInstance("");// 这里随便写,找不到dll文件会在class下找的,jar包中已经带有dll文件
// 获得language字典 language字典包名字,如法语fr 英语en
// 字典包名字为ar.dic 、ar.aff两个,后缀不需要写,hunspell自动拼接
d = h.getDictionary("C:/Users/yang/Desktop/checkSpell/tool/checkSpell/reources/dic/" + language);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
private static String decodeWord(String desc) {
desc = desc.replaceAll("<.+?>", " ").replace("<", "<").replaceAll("<\\b", "<").replace(">", ">").replaceAll(">\\b", ">").replace("\\u2019", "'").replace("\\u2011", "-")
.replace("\\u200F", " ").replace("\\u200b", "").replace("\\u200B", "").replace("\\ufeff", "").replace("\\uFEFF", "").replace("\\'", "'").replace("\\n", " ").replace("\\t", " ")
.replace("\\r", " ").replaceAll("(www\\.|http(s)?://)([\\w-]+\\.)*[\\w-]+(/[\\w-./?%&=]*)?", " ").replace("%d", " ").replace("$d", " ").replace("%s", " ").replace("$s", " ");
return desc;
}
}
检查结果如下
开始拼写检查
ha ax hoax hex has sax hat tax lax had hag ham max hap hah
AR Ar at a r are ear air arr art tar oar arc car gar
字典包如下
每次检查完记得把hunsell清空,直接h=null就可以了,还有字典路径不能有中文,会读不到字典包的