static INT MatchBytesNotRabinKarp(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter = TRUE); // 内存字节匹配;仿Rabin-Karp算法;
INT DiySystem::MatchBytesNotRabinKarp(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter /*= TRUE*/) { //Rabin - Karp算法的思想: //1,通过哈希函数算出子串的哈希值,然后将它和目标字符串中的子串的哈希值进行比较 //2,假设子串的长度为M, 目标字符串的长度为N,哈希函数计算子串hasn值,父串计算N-M+1次hasn值 //3,hash值不同字符串必然不匹配,hash值相同再逐个比较字节值(哈希函数无法保证对不同的字符串产生不同的哈希值),全相等则找到 //并不是对目标字符串的 每一个长度为M的子串都重新计算hash值,而是在前几个字串的基础之上, 计算下一个子串的 hash值 //优点:能够处理多模式匹配,在检测抄袭方面非常好用;好的哈希函数效率更高; //缺点:有时候和暴力匹配一样慢,且需要额外空间; // //仿Rabin - Karp算法:字节值累加,比较之;步长1递增,减首加末顺序比较;累加值相等再逐个比较; if (bCheckParameter) { if (!pText || iTextBytes <= 0 || !pPattern || iPatternBytes <= 0 || iPatternBytes > iTextBytes || IsBadReadPtr(pText, iTextBytes) || IsBadReadPtr(pPattern, iPatternBytes)) { return -1; } } LONGLONG llHashPattern = 0, llHashText = 0; for (INT i = 0; i < iPatternBytes; i++) { llHashPattern += pPattern[i]; llHashText += pText[i]; } BYTE* pbtHead = (BYTE*)pText; BYTE* pbtEnd = (BYTE*)pText + iPatternBytes - 1; for (INT j = 0; j <= iTextBytes - iPatternBytes; j++, llHashText -= *pbtHead, llHashText += *(pbtEnd + 1), pbtHead++, pbtEnd++) { if (llHashPattern == llHashText) { INT k = 0; for (; k < iPatternBytes; k++) //哈希值相同则逐个字节比较 { if (pText[k + j] != pPattern[k]) { break; } } if (k == iPatternBytes) { return j; } } } return -1; }