Algoritmo de detecção de palavras sensíveis

Ideia: algoritmo DFA

Autômatos finitos determinísticos, usados ​​para correspondência de expressão regular, correspondência de subpadrão esquerda mais longa

   /**
     * 检测敏感词
     *
     * @param scriptText
     * @param matchType
     * @return
     */
    public static Set<String> checkSensitiveWord(String scriptText, int matchType) {
        Set<String> sensitiveWordSet = new HashSet<>();
        for (int i = 0; i < scriptText.length(); i++) {
            int length = testSensitiveWord(scriptText, i, matchType, sensitiveWordMap);
            if (length > 0) {
                sensitiveWordSet.add(scriptText.substring(i, i + length));
                i = i + length - 1;
            }
        }
        return sensitiveWordSet;
    }

Crie um mapa de palavras sensível

 public static void initSensitiveWordMap(List<WordSenstive> wordSenstives) {
        log.info("开始初始化敏感词map");
        List<String> collect = wordSenstives.stream().map(a -> a.getSenstiveWord()).collect(Collectors.toList());
        Set<String> keyWordSet = new HashSet<String>(collect);
        Map<String, String> newWorMap = null;
        String key = null;
        Map nowMap = null;
        sensitiveWordMap = new HashMap(keyWordSet.size());
        Iterator<String> iterator = keyWordSet.iterator();
        while (iterator.hasNext()) {
            key = iterator.next();
            if (key == null) {
                continue;
            }
            nowMap = sensitiveWordMap;
            for (int i = 0; i < key.length(); i++) {
                char keyChar = key.charAt(i);
                Object wordMap = nowMap.get(keyChar);
                if (wordMap != null) {
                    nowMap = (Map) wordMap;
                } else {
                    newWorMap = new HashMap<String, String>();
                    newWorMap.put("isEnd", "0");
                    nowMap.put(keyChar, newWorMap);
                    nowMap = newWorMap;
                }
                if (i == key.length() - 1) {
                    nowMap.put("deepCount", i + 1 + "");
                    nowMap.put("isEnd", "1");
                }
            }
        }
        log.info("敏感词map构建完成");
    }

Combine palavras sensíveis

 private static int testSensitiveWord(String scriptText, int index, int matchType, Map sensitiveWordMap) {
        boolean flag = false;
        int matchFlag = 0;
        char word = 0;
        Map nowMap = sensitiveWordMap;
        for (int i = index; i < scriptText.length(); i++) {
            word = scriptText.charAt(i);
            nowMap = (Map) nowMap.get(word);
            if (nowMap != null) {
                matchFlag++;//找到相应的key,匹配标识+1
                if ("1".equals(nowMap.get("isEnd"))) {
                    Integer deepCount = Integer.valueOf((String) nowMap.get("deepCount"));
                    flag = isWord(scriptText, i, deepCount);
                    if (1 == matchType || flag) {//1:最小匹配,2:全匹配
                        break;
                    }
                }
            } else {
                break;
            }
        }
        if (matchFlag < 2 || !flag) {
            matchFlag = 0;
        }
        return matchFlag;
    }

partida é uma palavra

   private static boolean isWord(String scriptText, int i, int deepCount) {
        boolean isWord = true;
        if (i - deepCount >= 0 && scriptText.charAt(i - deepCount) > 96 && scriptText.charAt(i - deepCount) < 123) {
            isWord = false;
        }
        return isWord;
    }

おすすめ

転載: blog.csdn.net/babing18258840900/article/details/124987598