最长重复子串的算法

求最长重复子串的算法

public class StrTest {
    
    

    @Test
    public void longDoubSubStr(){
    
    
        String str = "eabcdabcfeabeabeabeab";
        String longDoubSubStr1 = findLongestSubString(str);
        System.out.println("longDoubSubStr1:"+longDoubSubStr1);

        String longDoubSubStr2 = findSubStr(str);
        System.out.println("longDoubSubStr2:"+longDoubSubStr2);

        String longDoubSubStr3 = findSubStrByNxt(str);
        System.out.println("longDoubSubStr3:"+longDoubSubStr3);
    }

    private String findSubStrByNxt(String str) {
    
    

        int maxPos = 0;
        int maxReStrLen = 0;
        /**
         * 使用KMP算法:依次去掉str最前端的i个字符,计算余下子串的next[]数组,数组的最大指,即为最大子串长度,
         * 并根据得到最大子串时的位置的i值,获取最大子串
         */
        for (int pos = 0; pos < str.length(); pos++){
    
    
            String subStrTem = str.substring(pos);
            /**
             *
             */
            int reStrLen = calcNxtArry(subStrTem);
            if (reStrLen > maxReStrLen){
    
    
                maxReStrLen = reStrLen;
                maxPos = pos;
            }

        }
        /**
         * 从maxSubStrLenSubInfo中解析出重复的最长子串信息
         */

        return str.substring(maxPos, maxPos + maxReStrLen);
    }

    private int calcNxtArry(String subStrTem) {
    
    

        int maxReStrLen = 0;
        int reStrLen = 0;
        int preIndexReStrLen = 0;
        for (int index = 0; index < subStrTem.length(); index++){
    
    
            reStrLen = getNxt(index, subStrTem, preIndexReStrLen);
            if (maxReStrLen < reStrLen){
    
    
                maxReStrLen = reStrLen;
            }
            preIndexReStrLen = reStrLen;
        }

        return maxReStrLen;
    }

    private int getNxt(int index, String subStrTem, int preIndexReStrLen) {
    
    
        int subStrLen = 0;
        if (index == 0){
    
    
            return 0;
        }


        /**
         * 以下的逻辑可以优化:
         * 1)记录前一index对应的preindex_subStrLen,subStrLen = preindex_subStrLen +1 开始循环
         */
		//        for(subStrLen = index; subStrLen > 0; subStrLen--){
    
    
//            if (subStrTem.substring(0, subStrLen).equals(subStrTem.substring(index - subStrLen + 1, index + 1))){
    
    
//                break;
//            }
//        }

        for(subStrLen = preIndexReStrLen +1; subStrLen > 0; subStrLen--){
    
    
            if (subStrTem.substring(0, subStrLen).equals(subStrTem.substring(index - subStrLen + 1, index + 1))){
    
    
                break;
            }
        }
        return subStrLen;
    }

    /**
     * 简化的三层循环
     * @param str
     * @return
     */
    private String findLongestSubString(String str) {
    
    


        int maxSubStrLen = 0;
        int maxSubStrPos = 0;
        /**
         * 对str有效性做检验
         */
        if(!checkValid(str)){
    
    
            return null;
        }
        /**
         * 双重遍历,寻找存在的重复子串,并记录重复子串长度:
         *      外重循环:两个子串起始坐标的间隔,从(1,len(str)-1)
         *      内重循环:从源串的起始位置,依次往后遍历
         * 维护找到的最长重复子串的长度以及起始位置。
         */
        int strLength = str.length();
        for (int interval = 1; interval < strLength; interval++){
    
    

            for (int pos = 0; pos + interval < strLength; pos++){
    
    

                int subStrLen = 0;

                try {
    
    
                    while (str.charAt(pos + subStrLen) == str.charAt(pos + interval + subStrLen)){
    
    
                        subStrLen++;
                        if (pos + interval + subStrLen >= strLength){
    
    
                            break;
                        }
                    }
                } catch (Exception e) {
    
    
                    e.printStackTrace();
                    System.out.println("pos: "+pos+"; interval: "+interval);
                }

                if (subStrLen > maxSubStrLen){
    
    
                    maxSubStrLen = subStrLen;
                    maxSubStrPos = pos;
                }

            }
        }
        /**
         * 构造子串,返回
         */
        return str.substring(maxSubStrPos, maxSubStrPos+maxSubStrLen);
    }

    private boolean checkValid(String str) {
    
    
        return StringUtils.isNotBlank(str);
    }

    /***
     * 原始的三层循环
     */
    private String findSubStr(String str){
    
    

        /**
         * 定义重复出现的最大子串
         */
        String maxSubStr = "";
        /**
         * 定义内层循环找到重复子串的标记,默认false,为找到,内层循环一旦找到,就跳出外层循环
         */
        boolean gotFlag = false;
        /**
         * 遍历所有可能的最大长度,理论最大值len(str)-1
         */
        for (int subStrLen = str.length()-1; subStrLen > 0; subStrLen--){
    
    
            /**
             * 遍历str中所有长度为subStrLen的数组,验证是否重复
             */
            for (int pos = 0; pos + subStrLen < str.length(); pos++){
    
    

                /**
                 * 得到子串
                 */
                String subStr = str.substring(pos, pos+subStrLen);
                /**
                 * 得到str首次出现子串的起始坐标
                 */
                int pos_fix = str.indexOf(subStr);
                /**
                 * 得到str末次出现子串的起始坐标
                 */
                int pos_post = str.lastIndexOf(subStr);

                /**
                 * 两者不相等则说明重复出现
                 */
                if (pos_fix != pos_post){
    
    
                    maxSubStr = subStr;
                    gotFlag = true;
                    break;
                }

            }

            if (gotFlag == true){
    
    
                break;
            }
        }
        return maxSubStr;
    }

}

Guess you like

Origin blog.csdn.net/cc890824/article/details/112250448