https://msdn.microsoft.com/en-us/library/2c8d19sb(VS.71).aspx
strtok
char *strtok( char *strToken, const char *strDelimit );
// crt_strtok.c /* In this program, a loop uses strtok * to print all the tokens (separated by commas * or blanks) in the string named "string". */ #include <string.h> #include <stdio.h> char teststring[] = "A string\tof ,,tokens\nand some more tokens"; char seps[] = " ,\t\n"; char *token; void main( void ) { printf( "Tokens:\n" ); /* Establish string and get the first token: */ token = strtok( teststring, seps ); while( token != NULL ) { /* While there are tokens in "string" */ printf( " %s\n", token ); /* Get next token: */ token = strtok( NULL, seps ); } }
Tokens: A string of tokens and some more tokens
char* strtok_A(char *s, const char *delim) { const char *spanp; int c, sc; char *tok; static char *last; if (s == NULL && (s = last) == NULL) return (NULL); /* * Skip (span) leading delimiters (s += strspn(s, delim), sort of). */ cont: c = *s++; for (spanp = delim; (sc = *spanp++) != 0;) { if (c == sc) goto cont; } if (c == 0) { /* no non-delimiter characters */ last = NULL; return (NULL); } tok = s - 1; /* * Scan token (scan for delimiters: s += strcspn(s, delim), sort of). * Note that delim must have one NUL; we stop if we see that, too. */ for (;;) { c = *s++; spanp = delim; do { if ((sc = *spanp++) == c) { if (c == 0) s = NULL; else s[-1] = 0; last = s; return (tok); } } while (sc != 0); } /* NOTREACHED */ }
char* strtok_func(char *strToken, const char *strDelimit) { static char *pToken = NULL; char *pchDelimit = NULL; char *pchRetToken = NULL; char ucFlag = 0; if (strToken != NULL) { pToken = strToken; } if ((*pToken == '\0') || (pToken == NULL)) { return NULL; } pchRetToken = pToken; while (*pToken != '\0') { for (pchDelimit = strDelimit; *pchDelimit != '\0'; pchDelimit ++) { if (*pchRetToken == *pchDelimit) { pchRetToken ++; break; } if (*pToken == *pchDelimit) { *pToken = '\0'; ucFlag = 1; } } pToken ++; if (ucFlag == 1) { break; } } return pchRetToken; }
下文转自
http://blog.csdn.net/morewindows/article/details/8740315
strtok源码剖析
strtok函数可以用于分隔字符串,最近看了下这个函数的源代码,其中有
unsigned char map[32]; /* Clear control map */ for (count = 0; count < 32; count++) map[count] = 0; /* Set bits in delimiter table */ do { map[*ctrl >> 3] |= (1 << (*ctrl & 7)); } while (*ctrl++);
这段代码非常有意思,第一眼可能不明白,为什么用个unsigned char map[32];数组来保存分隔字符。下面的map[*ctrl >> 3] |= (1 << (*ctrl & 7));更加有点古怪。在网上查了下,并没有文章来解释,因此写篇博客来解释说明下。
这个长为32的数组与后面的左移,右移操作看起来迷惑,其实如果看过《位操作基础篇之位操作全面总结》(http://blog.csdn.net/morewindows/article/details/7354571)的“位操作与空间压缩”便不难想到,这里其实是个位操作的空间压缩技巧。因此char类型的数据只会从0到255,因此建立一个哈希表来记录哪些字符要分割,需要则标记为1,否则标记为0,然后在分隔字符串时,就能直接判断字符串的该位置是否要分割。详细请见MyStrtok的实现。
//strtok源码剖析 位操作与空间压缩 //http://blog.csdn.net/morewindows/article/details/8740315 //By MoreWindows( http://blog.csdn.net/MoreWindows ) #include <stdio.h> // strtok源码剖析 char* __cdecl MyStrtok(char * string, const char * control) { unsigned char *str; const unsigned char *ctrl = (const unsigned char *)control; static unsigned char* _TOKEN = NULL; //注意这里使用了static类型,实际的strtok函数出于线程安全会使用TLS //由于char类型占一个字节取值范围为0~255 //所以可以打个bool flag[255]这样的哈希表 //来记录哪些字符为delimiter characters //然后根据《位操作基础篇之位操作全面总结》中的位操作与空间压缩 //http://blog.csdn.net/morewindows/article/details/7354571#t6 //可以将数组大小取255/8即32 unsigned char map[32]; int count; // Clear control map for (count = 0; count < 32; count++) map[count] = 0; // Set bits in delimiter table do { //map[*ctrl >> 3] |= (1 << (*ctrl & 7));//strtok原来的代码 map[*ctrl / 8] |= (1 << (*ctrl % 8)); } while (*ctrl++); // Initialize str // If string is NULL, set str to the saved pointer //(i.e., continue breaking tokens out of the string from the last strtok call) if (string != NULL) str = (unsigned char *)string; else str = (unsigned char *)_TOKEN; // Find beginning of token (skip over leading delimiters). Note that // there is no token iff this loop sets str to point to the terminal // null (*str == '\0') //while ( (map[*str >> 3] & (1 << (*str & 7))) && *str )//strtok原来的代码 while ( (map[*str / 8] & (1 << (*str % 8))) && *str ) str++; string = (char *)str; // Find the end of the token. If it is not the end of the string, // put a null there. for ( ; *str ; str++ ) { //if ( map[*str >> 3] & (1 << (*str & 7)) ) //strtok原来的代码 if ( map[*str / 8] & (1 << (*str % 8)) ) { *str++ = '\0'; break; } } // Update nextoken (or the corresponding field in the per-thread data structure _TOKEN = str; // Determine if a token has been found. if ( string == (char *)str ) return NULL; else return string; } int main() { printf(" strtok源码剖析 位操作与空间压缩\n"); printf(" - http://blog.csdn.net/morewindows/article/details/8740315 -\n"); printf(" - By MoreWindows( http://blog.csdn.net/MoreWindows ) - \n\n"); //char szText[] = "MoreWindows (By http://blog.csdn.net/MoreWindows)"; //char szFind[] = " "; char szText[] = "ab,c...d(e)f(g)hj"; char szFind[] = ",.()"; printf("原字符串为: %s\n", szText); printf("分隔后为: \n"); char *pToken = MyStrtok(szText, szFind); while (pToken != NULL) { printf("%s\n", pToken); pToken = MyStrtok(NULL, szFind); } return 0; }
运行结果如图所示(图片不能打开,请访问http://blog.csdn.net/morewindows/article/details/8740315)