[西门子面试题]strtok的理解以及实现

MSDN Link
https://msdn.microsoft.com/en-us/library/2c8d19sb(VS.71).aspx

strtok

char *strtok(
   char *strToken,
   const char *strDelimit 
);

// crt_strtok.c
/* In this program, a loop uses strtok
 * to print all the tokens (separated by commas
 * or blanks) in the string named "string".
 */

#include <string.h>
#include <stdio.h>

char teststring[] = "A string\tof ,,tokens\nand some  more tokens";
char seps[]   = " ,\t\n";
char *token;

void main( void )
{
   printf( "Tokens:\n" );
   /* Establish string and get the first token: */
   token = strtok( teststring, seps );
   while( token != NULL )
   {
      /* While there are tokens in "string" */
      printf( " %s\n", token );
      /* Get next token: */
      token = strtok( NULL, seps );
   }
}

Tokens:
 A
 string
 of
 tokens
 and
 some
 more
 tokens

char* strtok_A(char *s, const char *delim)
{
    const char *spanp;
    int c, sc;
    char *tok;
    static char *last;


    if (s == NULL && (s = last) == NULL)
        return (NULL);

    /*
    * Skip (span) leading delimiters (s += strspn(s, delim), sort of).
    */
cont:
    c = *s++;
    for (spanp = delim; (sc = *spanp++) != 0;) {
        if (c == sc)
            goto cont;
    }

    if (c == 0) {                 /* no non-delimiter characters */
        last = NULL;
        return (NULL);
    }
    tok = s - 1;

    /*
    * Scan token (scan for delimiters: s += strcspn(s, delim), sort of).
    * Note that delim must have one NUL; we stop if we see that, too.
    */
    for (;;) {
        c = *s++;
        spanp = delim;
        do {
            if ((sc = *spanp++) == c) {
                if (c == 0)
                    s = NULL;
                else
                    s[-1] = 0;
                last = s;
                return (tok);
            }
        } while (sc != 0);
    }
    /* NOTREACHED */
}

char* strtok_func(char *strToken,  const char *strDelimit)
{
    static char *pToken = NULL;
    char *pchDelimit = NULL;
    char *pchRetToken = NULL;
    char ucFlag = 0;

    if (strToken != NULL)
    {
        pToken = strToken;
    }

    if ((*pToken == '\0') || (pToken == NULL))
    {
        return NULL;
    }

    pchRetToken = pToken;
    while (*pToken != '\0')
    {
        for (pchDelimit = strDelimit; *pchDelimit != '\0'; pchDelimit ++)
        {
            if (*pchRetToken == *pchDelimit)
            {
                pchRetToken ++;
                break;
            }

            if (*pToken == *pchDelimit)
            {
                *pToken = '\0';
                ucFlag = 1;
            }
        }
        pToken ++;

        if (ucFlag == 1)
        {
            break;
        }
    }

    return pchRetToken;
}

下文转自
http://blog.csdn.net/morewindows/article/details/8740315

strtok源码剖析
strtok函数可以用于分隔字符串，最近看了下这个函数的源代码，其中有

unsigned char map[32];
/* Clear control map */
for (count = 0; count < 32; count++)
map[count] = 0;

/* Set bits in delimiter table */
do {
	map[*ctrl >> 3] |= (1 << (*ctrl & 7));
} while (*ctrl++);

这段代码非常有意思，第一眼可能不明白，为什么用个unsigned char map[32];数组来保存分隔字符。下面的map[*ctrl >> 3] |= (1 << (*ctrl & 7));更加有点古怪。在网上查了下，并没有文章来解释，因此写篇博客来解释说明下。

这个长为32的数组与后面的左移，右移操作看起来迷惑，其实如果看过《位操作基础篇之位操作全面总结》（http://blog.csdn.net/morewindows/article/details/7354571）的“位操作与空间压缩”便不难想到，这里其实是个位操作的空间压缩技巧。因此char类型的数据只会从0到255，因此建立一个哈希表来记录哪些字符要分割，需要则标记为1，否则标记为0，然后在分隔字符串时，就能直接判断字符串的该位置是否要分割。详细请见MyStrtok的实现。

//strtok源码剖析 位操作与空间压缩
//http://blog.csdn.net/morewindows/article/details/8740315
//By MoreWindows( http://blog.csdn.net/MoreWindows )
#include <stdio.h>
// strtok源码剖析
char* __cdecl MyStrtok(char * string, const char * control)
{
	unsigned char *str;
	const unsigned char *ctrl = (const unsigned char *)control;
	static unsigned char* _TOKEN = NULL; 
	//注意这里使用了static类型，实际的strtok函数出于线程安全会使用TLS

	//由于char类型占一个字节取值范围为0~255
	//所以可以打个bool flag[255]这样的哈希表
	//来记录哪些字符为delimiter characters
	//然后根据《位操作基础篇之位操作全面总结》中的位操作与空间压缩
	//http://blog.csdn.net/morewindows/article/details/7354571#t6
	//可以将数组大小取255/8即32
	unsigned char map[32];
	int count;

	// Clear control map
	for (count = 0; count < 32; count++)
		map[count] = 0;

	// Set bits in delimiter table
	do {
		//map[*ctrl >> 3] |= (1 << (*ctrl & 7));//strtok原来的代码
		map[*ctrl / 8] |= (1 << (*ctrl % 8));
	} while (*ctrl++);

	// Initialize str 
	// If string is NULL, set str to the saved pointer 
	//(i.e., continue breaking tokens out of the string from the last strtok call)
	if (string != NULL)
		str = (unsigned char *)string;
	else
		str = (unsigned char *)_TOKEN;

	// Find beginning of token (skip over leading delimiters). Note that
	// there is no token iff this loop sets str to point to the terminal
	// null (*str == '\0')
	//while ( (map[*str >> 3] & (1 << (*str & 7))) && *str )//strtok原来的代码
	while ( (map[*str / 8] & (1 << (*str % 8))) && *str )
		str++;

	string = (char *)str;

	// Find the end of the token. If it is not the end of the string,
	// put a null there. 
	for ( ; *str ; str++ )
	{
		//if ( map[*str >> 3] & (1 << (*str & 7)) ) //strtok原来的代码
		if ( map[*str / 8] & (1 << (*str % 8)) ) 
		{
			*str++ = '\0';
			break;
		}
	}

	// Update nextoken (or the corresponding field in the per-thread data structure
	_TOKEN = str;

	// Determine if a token has been found. 
	if ( string == (char *)str )
		return NULL;
	else
		return string;
}
int main()
{
	printf("   strtok源码剖析 位操作与空间压缩\n");  
	printf(" - http://blog.csdn.net/morewindows/article/details/8740315 -\n");  
	printf(" - By MoreWindows( http://blog.csdn.net/MoreWindows ) - \n\n"); 

	//char szText[] = "MoreWindows  (By http://blog.csdn.net/MoreWindows)";
	//char szFind[] = " ";
	char szText[] = "ab,c...d(e)f(g)hj";
	char szFind[] = ",.()";

	printf("原字符串为: %s\n", szText);
	printf("分隔后为: \n");
	char *pToken = MyStrtok(szText, szFind);
	while (pToken != NULL)
	{
		printf("%s\n", pToken);
		pToken = MyStrtok(NULL, szFind);
	}
	return 0;
}

运行结果如图所示（图片不能打开，请访问http://blog.csdn.net/morewindows/article/details/8740315）

[西门子面试题]strtok的理解以及实现

猜你喜欢