Algorithm_字节匹配_002_Rabin-Karp


    static INT MatchBytesNotRabinKarp(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter = TRUE); // 内存字节匹配;仿Rabin-Karp算法;

INT DiySystem::MatchBytesNotRabinKarp(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter /*= TRUE*/)
{
    //Rabin - Karp算法的思想:
    //1,通过哈希函数算出子串的哈希值,然后将它和目标字符串中的子串的哈希值进行比较
    //2,假设子串的长度为M, 目标字符串的长度为N,哈希函数计算子串hasn值,父串计算N-M+1次hasn值
    //3,hash值不同字符串必然不匹配,hash值相同再逐个比较字节值(哈希函数无法保证对不同的字符串产生不同的哈希值),全相等则找到
    //并不是对目标字符串的 每一个长度为M的子串都重新计算hash值,而是在前几个字串的基础之上, 计算下一个子串的 hash值
    //优点:能够处理多模式匹配,在检测抄袭方面非常好用;好的哈希函数效率更高;
    //缺点:有时候和暴力匹配一样慢,且需要额外空间;
    //
    //仿Rabin - Karp算法:字节值累加,比较之;步长1递增,减首加末顺序比较;累加值相等再逐个比较;

    if (bCheckParameter)
    {
        if (!pText || iTextBytes <= 0 || !pPattern || iPatternBytes <= 0 || iPatternBytes > iTextBytes || IsBadReadPtr(pText, iTextBytes) || IsBadReadPtr(pPattern, iPatternBytes))
        {
            return -1;
        }
    }

    LONGLONG llHashPattern = 0, llHashText = 0;
    for (INT i = 0; i < iPatternBytes; i++)
    {
        llHashPattern += pPattern[i];
        llHashText += pText[i];
    }
    BYTE* pbtHead = (BYTE*)pText;
    BYTE* pbtEnd = (BYTE*)pText + iPatternBytes - 1;
    for (INT j = 0;
        j <= iTextBytes - iPatternBytes;
        j++, llHashText -= *pbtHead, llHashText += *(pbtEnd + 1), pbtHead++, pbtEnd++)
    {
        if (llHashPattern == llHashText)
        {
            INT k = 0;
            for (; k < iPatternBytes; k++) //哈希值相同则逐个字节比较
            {
                if (pText[k + j] != pPattern[k])
                {
                    break;
                }
            }
            if (k == iPatternBytes)
            {
                return j;
            }
        }
    }
    return -1;
}

猜你喜欢

转载自www.cnblogs.com/dailycode/p/9644402.html