Algorithm_字节匹配_005_Sunday


static INT MatchBytesSunday(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter = TRUE); // 内存字节匹配;Sunday算法;

INT DiySystem::MatchBytesSunday(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter /*= TRUE*/)
{
    //Sunday算法
    //原理与BM算法相仿,有点像其删减版,所以其时间复杂度和BM算法差不多,平均性能的时间复杂度也为O(n),最差情况的时间复杂度为O(n * m),但是要容易理解。
    //
    //从前往后匹配
    //匹配数==模式串长度则返回
    //匹配数<模式串长度则判断文本串参与匹配的末字符的下一字符,在模式串逆序查找
    //逆序下标+1即为模式串后移步长,如果=-1则模式串与该字符的下一字符对齐
    //
    //LESSONS TEARNED IN SOFTWARE TE
    //SOFTWARE
    //文本串参与匹配的末字符的下一字符T
    //在模式串逆序查找,后移次数=5
    //-->
    //LESSONS TEARNED IN SOFTWARE TE
    //     SOFTWARE
    //文本串参与匹配的末字符的下一字符E
    //在模式串逆序查找,后移次数=1
    //-->
    //LESSONS TEARNED IN SOFTWARE TE
    //      SOFTWARE
    //文本串参与匹配的末字符的下一字符D
    //在模式串逆序查找,后移次数=-1,即D的下一字符与模式串首字符对齐
    //-->
    //LESSONS TEARNED IN SOFTWARE TE
    //               SOFTWARE
    //文本串参与匹配的末字符的下一字符W
    //在模式串逆序查找,后移次数=4
    //-->
    //LESSONS TEARNED IN SOFTWARE TE
    //                   SOFTWARE

    if (bCheckParameter)
    {
        if (!pText || iTextBytes <= 0 || !pPattern || iPatternBytes <= 0 || iPatternBytes > iTextBytes || IsBadReadPtr(pText, iTextBytes) || IsBadReadPtr(pPattern, iPatternBytes))
        {
            return -1;
        }
    }

    //模式串字节最后出现下标表,[字节值]=下标,-1未出现该字符
    vector<INT> vecPatternIndex(BYTE_MAX + 1, -1);
    for (INT i = 0; i < iPatternBytes; i++)
    {
        vecPatternIndex[pPattern[i]] = i; // 存储末次出现下标
    }

    for (INT i = 0, iMatch = 0, iSkip = 0; i <= iTextBytes - iPatternBytes;)
    {
        iMatch = 0;
        for (INT j = 0; j < iPatternBytes; j++)
        {
            if (pPattern[j] != pText[i + j])
            {
                break;
            }
            iMatch++;
        }
        if (iMatch == iPatternBytes)
        {
            return i;
        }
        else
        {
            iSkip = vecPatternIndex[pText[i + iPatternBytes]]; //文本串参与匹配的末字符的下一字符
            if (-1 == iSkip)
            {
                i += iPatternBytes;
            }
            else
            {
                i += (iPatternBytes - iSkip);
            }
        }
    }
    return -1;
}

猜你喜欢

转载自www.cnblogs.com/dailycode/p/9644407.html