常见的算法技巧——字符串匹配算法

简单介绍
算法分类
文章小结

简单介绍

字符串匹配算法是用于在一个字符串（文本）中查找另一个字符串（模式）的出现位置的算法。

算法分类

暴力匹配算法（Brute-Force）

暴力匹配算法是一种简单直接的字符串匹配算法，也称为朴素算法。它从文本的起始位置开始，逐个字符与模式进行比较，如果遇到不匹配的字符，则向右滑动模式继续匹配。

#include <iostream>
#include <string>

int bruteForceSearch(const std::string& text, const std::string& pattern) {
    
    
    int n = text.length();
    int m = pattern.length();

    for (int i = 0; i <= n - m; i++) {
    
    
        int j;
        for (j = 0; j < m; j++) {
    
    
            if (text[i + j] != pattern[j]) {
    
    
                break;
            }
        }

        if (j == m) {
    
    
            return i;  // 匹配成功，返回匹配位置
        }
    }

    return -1;  // 匹配失败，返回-1
}

int main() {
    
    
    std::string text = "Hello, World!";
    std::string pattern = "World";

    int position = bruteForceSearch(text, pattern);

    if (position != -1) {
    
    
        std::cout << "Pattern found at position " << position << std::endl;
    } else {
    
    
        std::cout << "Pattern not found" << std::endl;
    }

    return 0;
}

KMP算法（Knuth-Morris-Pratt）

KMP算法是一种高效的字符串匹配算法，通过预处理模式字符串构建一个部分匹配表（Partial Match Table），利用该表来实现在匹配过程中的跳过操作，从而减少比较次数。

#include <iostream>
#include <string>
#include <vector>

void buildPartialMatchTable(const std::string& pattern, std::vector<int>& table) {
    
    
    int m = pattern.length();
    int j = 0;
    table[0] = 0;

    for (int i = 1; i < m; i++) {
    
    
        if (pattern[i] == pattern[j]) {
    
    
            j++;
            table[i] = j;
        } else {
    
    
            if (j != 0) {
    
    
                j = table[j - 1];
                i--;
            } else {
    
    
                table[i] = 0;
            }
        }
    }
}

int kmpSearch(const std::string& text, const std::string& pattern) {
    
    
    int n = text.length();
    int m = pattern.length();
    std::vector<int> table(m, 0);
    buildPartialMatchTable(pattern, table);

    int i = 0, j = 0;
    while (i < n) {
    
    
        if (pattern[j] == text[i]) {
    
    
            j++;
            i++;
        }

        if (j == m) {
    
    
            return i - j;  // 匹配成功，返回匹配位置
        } else if (i < n && pattern[j] != text[i]) {
    
    
            if (j != 0) {
    
    
                j = table[j - 1];
            } else {
    
    
                i++;
            }
        }
    }

    return -1;  // 匹配失败，返回-1
}

int main() {
    
    
    std::string text = "Hello, World!";
    std::string pattern = "World";

    int position = kmpSearch(text, pattern);

    if (position != -1) {
    
    
        std::cout << "Pattern found at position " << position << std::endl;
    } else {
    
    
        std::cout << "Pattern not found" << std::endl;
    }

    return 0;
}

Boyer-Moore算法

Boyer-Moore算法是一种高效的字符串匹配算法，它通过预处理模式字符串和利用坏字符规则（Bad Character Rule）和好后缀规则（Good Suffix Rule）来实现在匹配过程中的跳过操作，从而减少比较次数。

#include <iostream>
#include <string>
#include <vector>
#include <algorithm>

const int ASCII_SIZE = 256;

void buildBadCharacterTable(const std::string& pattern, std::vector<int>& table) {
    
    
    int m = pattern.length();
    std::fill(table.begin(), table.end(), -1);

    for (int i = 0; i < m; i++) {
    
    
        table[pattern[i]] = i;
    }
}

void buildGoodSuffixTable(const std::string& pattern, std::vector<int>& table) {
    
    
    int m = pattern.length();
    std::vector<int> suffix(m, 0);
    int lastPrefixIndex = m;

    for (int i = m - 1; i >= 0; i--) {
    
    
        if (isSuffix(pattern, i + 1, m - 1)) {
    
    
            lastPrefixIndex = i + 1;
        }

        suffix[i] = lastPrefixIndex - i + m - 1;
    }

    for (int i = 0; i < m - 1; i++) {
    
    
        int j = getSuffixIndex(suffix, i);
        if (j != -1) {
    
    
            table[j] = i;
        }
    }
}

int boyerMooreSearch(const std::string& text, const std::string& pattern) {
    
    
    int n = text.length();
    int m = pattern.length();
    std::vector<int> badCharacter(ASCII_SIZE, -1);
    std::vector<int> goodSuffix(m, 0);

    buildBadCharacterTable(pattern, badCharacter);
    buildGoodSuffixTable(pattern, goodSuffix);

    int i = 0;
    while (i <= n - m) {
    
    
        int j = m - 1;
        while (j >= 0 && pattern[j] == text[i + j]) {
    
    
            j--;
        }

        if (j < 0) {
    
    
            return i;  // 匹配成功，返回匹配位置
        } else {
    
    
            int shift = std::max(j - badCharacter[text[i + j]], goodSuffix[j]);
            i += shift;
        }
    }

    return -1;  // 匹配失败，返回-1
}

int main() {
    
    
    std::string text = "Hello, World!";
    std::string pattern = "World";

    int position = boyerMooreSearch(text, pattern);

    if (position != -1) {
    
    
        std::cout << "Pattern found at position " << position << std::endl;
    } else {
    
    
        std::cout << "Pattern not found" << std::endl;
    }

    return 0;
}

文章小结

根据实际应用场景和数据规模，选择适合的字符串匹配算法可以提高匹配效率。希望以上示例对您有所帮助！如果您有任何其他问题，请随时提问。

常见的算法技巧——字符串匹配算法

常见的算法技巧——字符串匹配算法

简单介绍

算法分类

暴力匹配算法（Brute-Force）

KMP算法（Knuth-Morris-Pratt）

Boyer-Moore算法

文章小结

猜你喜欢