java关键词过滤算法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

* java关键字过滤
* HashMap实现DFA算法
* 关键词替换为*
* @author Methol
*/

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

({ "rawtypes", "unchecked" })
public class {
/** 直接禁止的 */
private HashMap keysMap = new HashMap();
static final String tihuantxt = "**********************************************************";
private int matchType = 1; // 1:最小长度匹配 2:最大长度匹配

public void addKeywords(List<String> keywords) {
for (int i = 0; i < keywords.size(); i++) {
String key = keywords.get(i).trim();
HashMap nowhash = null;
nowhash = keysMap;
for (int j = 0; j < key.length(); j++) {
char word = key.charAt(j);
Object wordMap = nowhash.get(word);
if (wordMap != null) {
nowhash = (HashMap) wordMap;
} else {
HashMap<String, String> newWordHash = new HashMap<String, String>();
newWordHash.put("isEnd", "0");
nowhash.put(word, newWordHash);
nowhash = newWordHash;
}
if (j == key.length() - 1) {
nowhash.put("isEnd", "1");
}
}
}
}


* 重置关键词
*/
public void clearKeywords() {
keysMap.clear();
}


* 检查一个字符串从begin位置起开始是否有keyword符合, 如果有符合的keyword值,返回值为匹配keyword的长度,否则返回零
* flag 1:最小长度匹配 2:最大长度匹配
*/
private int checkKeyWords(String txt, int begin, int flag) {
HashMap nowhash = null;
nowhash = keysMap;
int maxMatchRes = 0;
int res = 0;
int l = txt.length();
char word = 0;
for (int i = begin; i < l; i++) {
word = txt.charAt(i);
Object wordMap = nowhash.get(word);
if (wordMap != null) {
res++;
nowhash = (HashMap) wordMap;
if (((String) nowhash.get("isEnd")).equals("1")) {
if (flag == 1) {
wordMap = null;
nowhash = null;
txt = null;
return res;
} else {
maxMatchRes = res;
}
}
} else {
txt = null;
nowhash = null;
return maxMatchRes;
}
}
txt = null;
nowhash = null;
return maxMatchRes;
}


* 返回txt中关键字的列表
*/
public String getTxtKeyWords(String txt) {
Set set = new HashSet();
int l = txt.length();
String newtxt = new String();
for (int i = 0; i < l;) {
int len = checkKeyWords(txt, i, matchType);
if (len > 0) {
newtxt += txt.substring(0, i);
newtxt += tihuantxt.substring(0, len);
newtxt += txt.substring(i + len, txt.length());
txt = newtxt;
newtxt = "";
i += len;
} else {
i++;
}
}
return txt;
}


* 仅判断txt中是否有关键字
*/
public boolean isContentKeyWords(String txt) {
for (int i = 0; i < txt.length(); i++) {
int len = checkKeyWords(txt, i, 1);
if (len > 0) {
return true;
}
}
txt = null;
return false;
}

public int getMatchType() {
return matchType;
}

public void setMatchType(int matchType) {
this.matchType = matchType;
}
}

原文:大专栏  java关键词过滤算法


猜你喜欢

转载自www.cnblogs.com/chinatrump/p/11596627.html