static INT MatchBytesSunday(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter = TRUE); // 内存字节匹配;Sunday算法;
INT DiySystem::MatchBytesSunday(IN const UCHAR* pText, IN INT iTextBytes, IN const UCHAR* pPattern, IN INT iPatternBytes, IN BOOL bCheckParameter /*= TRUE*/) { //Sunday算法 //原理与BM算法相仿,有点像其删减版,所以其时间复杂度和BM算法差不多,平均性能的时间复杂度也为O(n),最差情况的时间复杂度为O(n * m),但是要容易理解。 // //从前往后匹配 //匹配数==模式串长度则返回 //匹配数<模式串长度则判断文本串参与匹配的末字符的下一字符,在模式串逆序查找 //逆序下标+1即为模式串后移步长,如果=-1则模式串与该字符的下一字符对齐 // //LESSONS TEARNED IN SOFTWARE TE //SOFTWARE //文本串参与匹配的末字符的下一字符T //在模式串逆序查找,后移次数=5 //--> //LESSONS TEARNED IN SOFTWARE TE // SOFTWARE //文本串参与匹配的末字符的下一字符E //在模式串逆序查找,后移次数=1 //--> //LESSONS TEARNED IN SOFTWARE TE // SOFTWARE //文本串参与匹配的末字符的下一字符D //在模式串逆序查找,后移次数=-1,即D的下一字符与模式串首字符对齐 //--> //LESSONS TEARNED IN SOFTWARE TE // SOFTWARE //文本串参与匹配的末字符的下一字符W //在模式串逆序查找,后移次数=4 //--> //LESSONS TEARNED IN SOFTWARE TE // SOFTWARE if (bCheckParameter) { if (!pText || iTextBytes <= 0 || !pPattern || iPatternBytes <= 0 || iPatternBytes > iTextBytes || IsBadReadPtr(pText, iTextBytes) || IsBadReadPtr(pPattern, iPatternBytes)) { return -1; } } //模式串字节最后出现下标表,[字节值]=下标,-1未出现该字符 vector<INT> vecPatternIndex(BYTE_MAX + 1, -1); for (INT i = 0; i < iPatternBytes; i++) { vecPatternIndex[pPattern[i]] = i; // 存储末次出现下标 } for (INT i = 0, iMatch = 0, iSkip = 0; i <= iTextBytes - iPatternBytes;) { iMatch = 0; for (INT j = 0; j < iPatternBytes; j++) { if (pPattern[j] != pText[i + j]) { break; } iMatch++; } if (iMatch == iPatternBytes) { return i; } else { iSkip = vecPatternIndex[pText[i + iPatternBytes]]; //文本串参与匹配的末字符的下一字符 if (-1 == iSkip) { i += iPatternBytes; } else { i += (iPatternBytes - iSkip); } } } return -1; }