常用字符串匹配算法(暴力解法,KMP,Sunday)

1. 暴力解法

// 暴力求解
int Idx(string S, string T){
    // 返回第一个匹配元素的位置,若没有匹配的子串,则返回-1
    int S_size = S.length();
    int T_size = T.length();
    if(S_size == T_size && S_size == 0)
        return 0;
    if(S_size < T_size)
        return -1;

    int head = 0;
    int i = head;
    int j = 0;

    while(i < S_size && j < T_size){
        if(S[i] == T[j]){
            ++i;
            ++j;
            if(j == T_size && i <= S_size)
                return head;
        }
        else{
            ++head;
            i = head;// i回溯, 在kmp算法中,i不会出现回溯,即i值不会减小
            j = 0;
        }
    }
    return -1;
}

2. KMP (包括返回第一个匹配字符串的位置和返回所有匹配字符串的位置)

void PartialMatchTable(string s, int next[]){
    int len = s.length();
    next[0] = -1;
    int i = 0;
    int j = -1;

    while(i < len){
        if(j == -1 || s[i] == s[j]){
            ++i;
            ++j;
            next[i] = j;
        }
        else
            j = next[j];
    }
}

int kmp(string s, string p){
    int s_size = s.length();
    int p_size = p.length();

    int next[p_size];
    PartialMatchTable(p, next);
    int i = 0;
    int j = 0;
    while(i < s_size && j < p_size){
        if(j == -1 || s[i] == p[j]){
            i++;
            j++;
        }
        else{
            j = next[j];
        }
    }
    if(j == p_size)
        return i-j;
    else
        return -1;
}

// kmp_vec(string s, string p)找出所有匹配位置
vector<int> kmp_vec(string s, string p){
    int s_size = s.length();
    int p_size = p.length();
    vector<int> pos;

    int next[p_size];
    PartialMatchTable(p, next);
    int i = 0;
    int j = 0;
    while(i < s_size && j < p_size){
        if(j == -1 || s[i] == p[j]){
            i++;
            j++;
            if(j == p_size){
                pos.push_back(i-j);
                j = 0;
            }
        }
        else{
            j = next[j];
        }
    }

    if(pos.size() == 0)
        pos.push_back(-1);
    return pos;
}

3. Sunday

int SundaySearch(string t, string p){
    int t_size = t.size();
    int p_size = p.size();

    if(p_size <= 0 || t_size <= 0)
        return -1;

    int i = 0, j = 0;
    int k;
    int m = p_size;
    while(i < t_size){
        if(t[i] != p[j]) {// 不相等
            for(k = p_size-1; k>=0; --k) {
                if(p[k] == t[m])
                    break;
            }
            // i = i + p_size - k;
            i = m - k;
            j = 0;
            m = i + p_size;
        }
        else { // 相等,比较下一个字符
            i++;
            j++;
            if(j == p_size)
                return i-j;
        }
    }
    return -1;
}

4. 资料

D.M. Sunday: A Very Fast Substring Search Algorithm. Communications of the ACM

阮一峰. 字符串匹配的KMP算法

July. 从头到尾彻底理解KMP(2014年8月22日版)

猜你喜欢

转载自www.cnblogs.com/iwangzhengchao/p/10283326.html