java sensitive word filtering

Here article on the record very common in the actual development needs, that is for some fields to filter sensitive word processing, packaging the two tools

TrieNode class

import java.util.HashMap;
import java.util.Map;

public class TrieNode {
    private Map<Character, TrieNode> nodeMap;
    private boolean endFlag = false;

    public TrieNode(Character letter, Map<Character, TrieNode> nodeMap) {
        this.nodeMap = nodeMap;
    }

    public TrieNode addLetter(Character letter) {
        if (letter == null) {
            return this;
        } else {
            if (this.nodeMap == null) {
                this.nodeMap = new HashMap();
            }

            TrieNode node = (TrieNode)this.nodeMap.get(letter);
            if (node == null) {
                node = new TrieNode(letter, (Map)null);
                this.nodeMap.put(letter, node);
            }

            return node;
        }
    }

    public Map<Character, TrieNode> getNodeMap() {
        return this.nodeMap;
    }

    public boolean getEndFlag() {
        return this.endFlag;
    }

    public void setEndFlag(boolean endFlag) {
        this.endFlag = endFlag;
    }
}

TrieTree class

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Map;

public class TrieTree {
    private static TrieNode rootNode = new TrieNode((Character)null, (Map)null);

    private TrieTree() {}
	
	//对读取的敏感词文件进行处理
    public static void addString(String keyWord) {
        TrieNode node = rootNode;

        for(int i = 0; i < keyWord.length(); ++i) {
            Character c = keyWord.charAt(i); //获取敏感词中的单个字符,例如三、s、m等
            node = node.addLetter(c);//将单个字符进行判断,加入hashmap中
        }

        node.setEndFlag(true); //设置结束标识符为true
    }
	//对敏感词进行过滤,将敏感词字段使用mark字符进行替换
    public static String filter(String text, char mark) {
        if (text != null && text.trim() != null) {
            StringBuilder builder = new StringBuilder();
            int maxLength = text.length();

            for(int i = 0; i < maxLength; ++i) {
                TrieNode currentNode = rootNode;
                int j = i;

                boolean matchBadWord;
                for(matchBadWord = false; currentNode != null && j < maxLength; ++j) {
                    char target = text.charAt(j);
                    TrieNode node = (TrieNode)currentNode.getNodeMap().get(target);
                    if (node == null) {
                        break;
                    }

                    if (node.getEndFlag()) {
                        matchBadWord = true;
                        break;
                    }

                    currentNode = node;
                }

                if (!matchBadWord) {
                    builder.append(text.charAt(i));
                } else {
                    for(int k = j - i; k >= 0; --k) {
                        builder.append(mark);
                    }

                    i = j;
                }
            }

            return builder.toString();
        } else {
            return null;
        }
    }
	//判断是否存在敏感词
    public static boolean isContaintSensitiveWord(String text) {
        if (text != null && text.trim() != null) {
            int maxLength = text.length();

            for(int i = 0; i < maxLength; ++i) {
                TrieNode currentNode = rootNode;

                for(int j = i; currentNode != null && j < maxLength; ++j) {
                    char target = text.charAt(j);
                    TrieNode node = (TrieNode)currentNode.getNodeMap().get(target);
                    if (node == null) {
                        break;
                    }

                    if (node.getEndFlag()) {
                        return true;
                    }

                    currentNode = node;
                }
            }

            return false;
        } else {
            return false;
        }
    }
	//加载敏感词文件
    static {
        InputStreamReader read = null;
        InputStream fileInputStream = null;

        try {
            fileInputStream = TrieTree.class.getClassLoader().getResourceAsStream("SensitiveWord.txt");
            read = new InputStreamReader(fileInputStream);
            BufferedReader bufferedReader = new BufferedReader(read);
            String txt = null;

            while((txt = bufferedReader.readLine()) != null) {
                addString(txt);
            }
        } catch (Exception var12) {
        } finally {
            try {
                fileInputStream.close();
                read.close();
            } catch (IOException var11) {
            }

        }

    }
}

Test category

public class TestMain {
    public TestMain() {}
    public static void main(String[] args) {
        System.out.println(TrieTree.filter("我们去吃饭了三级", '*'));
        System.out.println(TrieTree.isContaintSensitiveWord("sm"));
    }
}

Here Insert Picture Description
After the packaging is finished, packaged, can generate a jar file, others will be performed using direct jar file mvn install command can install it to mavern local repository, to be used in the introduction of this dependence pom.xml file
in the jar file is located directory, this needs to be configured in advance maven environment variables, execute the following command to install the jar to the local maven repository

mvn install:install-file -DgroupId=com.xxx -DartifactId=sensitivewordfiltercommon -Dversion=1.0 -Dfile=sensitivewordfiltercommon-1.0.jar -Dpackaging=jar -DgeneratePom=true

Finally, if you want to upload code to the test environment, this jar into the lib directory under WEB-INF directory under ROOT directory of services, restart the server can take effect
Here Insert Picture Description

Published 207 original articles · 87 won praise · views 50000 +

Guess you like

Origin blog.csdn.net/zhouym_/article/details/102482711