C language savior (simulated implementation of string functions and memory functions--11)

content

The C language itself does not have a string type, and strings are usually placed in constant strings or character arrays.

1. String functions with unlimited length

1.1 strlen

1.2 Three ways to simulate the strlen library function

1.3 strcpy string copy

1.4 Simulate strcpy

The strcpy function returns the starting address of the target space

The return type of the strcpy function is set to achieve chained access

1.5 strcat String Append

1.6 Simulate strcat

1.7 strcmp compares strings

1.8 Simulate the implementation of strcmp

2 String functions with limited length

2.1 strncpy

2.2 strncat

2.3 strncmp

2.4 strstr finds one string within another

2.5 Simulate the realization of strstr

2.6 strtok

2.7 strerror perror returns the error code and the corresponding error message

2.8 Character Classification Function

3. Memory operation functions

3.1 memcpy memory space data copy

3.2 Simulate the implementation of memcpy

 In the same memory copy, the target and source data spaces are intersected, we should use memmove

3.3 memmove can achieve overlapping memory copy

3.4 Simulation to achieve memmove

3.5 memcpy memory byte correspondence comparison

3.6 memset memory setting in bytes


The C language itself does not have a string type, and strings are usually placed in constant strings or character arrays.

1. String functions with unlimited length

1.1 strlen

size_t strlen ( const char * str );//函数原型,注意函数的返回值为size_t

The string has '\0' as the end marker, and the strlen function returns the number of characters that appear before '\0' in the string (excluding '\0').

char arr[] = { 'a', 'b', 'c', 'd', 'e', 'f' };//无法用strlen求字符串长度

char arr[10] = { 'a', 'b', 'c', 'd', 'e', 'f' };//限定长度,可以求

 What is the result below?

 if ((int)strlen("abc") - (int)strlen("qwerty") > 0)
	{
		printf(">\n");
	}
	else
	{
		printf("<=\n");
	}

Answer: >, the result of subtracting two unsigned numbers is a positive number


1.2 Three ways to simulate the strlen library function

usually written

 #include <assert.h>
 #include <stdio.h>

size_t my_strlen(const char* str)
{
	int count = 0;//统计字符的个数
    assert(str);
	while (*str != '\0')
	{
		count++;
		str++;
	}
	return count;
}
 
 
int main()
{
	char arr[] = "abcd";
	//char* str = arr;
	int len = my_strlen(arr);
	printf("%d\n", len);
	return 0;
}

recursion

 #include <assert.h>
 #include <stdio.h>

//my_strlen("abcdef")
//1+my_strlen("bcdef")
//1+1+my_strlen("cdef")
//1+1+1+ my_strlen("def")
//1+1+1+1+ my_strlen("ef")
//1 + 1 + 1 + 1 +1+my_strlen("f")
//1 + 1 + 1 + 1 + 1 + 1+ my_strlen("")
//1 + 1 + 1 + 1 + 1 + 1 + 0 = 6
 
size_t my_strlen(const char* str)
{
    assert(str);
	if (*str != '\0')
		return 1 + my_strlen(str+1);
	else
		return 0;
}
 
int main()
{
	char arr[] = "abcd";
	//char* str = arr;
	int len = my_strlen(arr);
	printf("%d\n", len);
	return 0;
}

pointer-pointer method

 #include <assert.h>
 #include <stdio.h>


size_t my_strlen(const char* arr,int sz)//指针-指针
{
    assert(arr);
    char* right = arr + sz - 1;
    return right - arr;
}
 
int main()
{
    char arr[] = "hello";
    int sz = sizeof(arr) / sizeof(arr[0]);
    int len=my_strlen(arr,sz);
    printf("%d", len);
    return 0;
}

1.3 strcpy string copy

String copy, copy the source string into the target space string, the matters needing attention

1. The source string must end with '\0'.

char arr1[20] = {0};
char arr2[] = {'a','b','c'};//程序崩溃,没有\0

2. The '\0' in the source string will be copied to the target space.

#include <stdio.h>
#include <assert.h>
#include <string.h>

int main()
{
	
	char arr1[] = "XXXXXXXXXXXX";//arr1指向的是常量字符串,常量是不可修改的
	char arr2[] = "abcdef";
	strcpy(arr1, arr2);
	printf("%s\n", arr1);
	return 0;
}

3. The destination space must be large enough to hold the source string.

strcpy doesn't matter if there is enough space, as long as you dare to write it, you can put it, even if the program crashes

char arr1[4] = "x";
char arr2[] = "abcdef";
strcpy(arr1, arr2);//程序崩溃

4. The target space must be variable.

char* arr1 = "qwertyuiop";//arr1指向的是常量字符串,常量是不可修改的
char arr2[] = "abcdef";
strcpy(arr1, arr2);//程序崩溃

1.4 Simulate strcpy

The strcpy function returns the starting address of the target space

The return type of the strcpy function is set to achieve chained access

char* my_strcpy(char*dest, const char* src)
{
	assert(src && dest);
	char* ret = dest;
	while(*dest++ = *src++)
	{
		;
	}
	return ret;
}

int main()
{
	char arr1[20] = { 0 };
	char* arr2 = "hello bit";

	printf("%s\n", my_strcpy(arr1, arr2));
	return 0;
}


1.5 strcat String Append

int main()
{
	char arr1[10] =  "hello " ;
	char* arr2 = "bit";

	printf("%s\n", strcat(arr1, arr2));
	return 0;
}

Precautions:

1. The source string must end with '\0'. Append from \0

int main()
{
	char arr1[20] =  "hello\0XXXXX" ;
	char arr2[] = "bit";

	printf("%s\n", strcat(arr1, arr2));
	return 0;
}

2. The target space must be large enough to accommodate the content of the source string.

3. The target space must be modifiable.

1.6 Simulate strcat

char* my_strcat(char* dest, char* src)
{
	assert(dest && src);
	char* ret = dest;
	//找目标空间中的\0
	while (*dest)
	{
		dest++;
	}
	//拷贝
	while (*dest++ = *src++)
	{
		;
	}

	return ret;
}
int main()
{
	char arr1[20] = "hello";
	char arr2[] = " bit";

	printf("%s\n", my_strcat(arr1, arr2));
	
	return 0;
}

How about appending the string to itself?

The simulation function we wrote cannot complete appending itself, and it is not recommended to use it like this


1.7 strcmp compares strings

The strcmp function compares not the length of the string 

but the size (ASCII code value) of the character at the corresponding position in the string. If they are the same, compare the next pair until they are different or both encounter the ASCII of \0 \0 code value is 0

 standard regulation:

If the first string is greater than the second string, return a number greater than 0

If the first string is equal to the second string, return 0

If the first string is less than the second string, return a number less than 0


1.8 Simulate the implementation of strcmp

int my_strcmp(const char* s1, const char* s2)
{
	assert(s1 && s2);
	while (*s1 == *s2)
	{
		if (*s1 == '\0')
		{
			return 0;//相等
		}
		s1++;
		s2++;
	}
	//不相等
	return *s1 - *s2;
}

int main()
{
	char arr1[] = "abcd";
	char arr2[] = "abdc";
	int ret = my_strcmp(arr1, arr2);
	if (ret >0)
	{
		printf(">\n");
	}
	else if (ret == 0)
	{
		printf("== \n");
	}
	else
	{
		printf("<\n");
	}
	printf("%d\n", ret);
	return 0;
}

2 String functions with limited length

2.1 strncpy

 Copies num characters from the source string to the destination space.

If the length of the source string is less than num, after copying the source string, append 0 to the end of the target until num.

char *strncpy( char *strDest, const char *strSource, size_t count );
int main()
{
	char arr1[] = "abcdef";
	char arr2[] = "qwewwwwww";

	strncpy(arr1, arr2, 5);

	printf("%s\n", arr1);
	return 0;
}

int main()
{
	char arr1[] = "abcdef";
	char arr2[] = "qwe";

	strncpy(arr1, arr2, 5);//不够默认补\0

	printf("%s\n", arr1);
	return 0;
}


2.2 strncat

 Append num characters from the source string to the destination space.

int main()
{
	char arr1[20] = "abcdef\0XXXXX";
	char arr2[] = "qwe";

	strncat(arr1, arr2, 3);//追加三个,还会再把\0放进去

	printf("%s\n", arr1);
	return 0;
}


2.3 strncmp

int main()
{
	char arr1[] = "abcdef";
	char arr2[] = "abcdq";
	int ret = strncmp(arr1, arr2, 4);//相等
	
	printf("%d\n", ret);

	return 0;
}


2.4 strstr finds one string within another

char * strstr ( const char *str1, const char * str2);

 Determine whether str2 is a substring of str1, if str2 appears in str1, return the address of the first appearance in str1

If not present, return a null pointer

int main()
{
	char arr1[] = "abcdef";
	char arr2[] = "de";
	char * p=strstr(arr1, arr2);
	if (p == NULL)//strstr找不到返回NULL指针,我们需要判断
	{
		printf("找不到");
	}
	else
	{
		printf("%s ", p);
	}
	return 0;
}

2.5 Simulate the realization of strstr

 Idea: If the substring to be found is complicated, we need three pointers to assist

The s1 pointer points to str1, and the s2 pointer points to str2. The cur pointer points to str1, which is used to record the address where the match starts

If the corresponding positions of the two strings are not equal, str1 goes backward

If equal, start matching, we should remember the str1 position to start matching, because it may be equal, it may not be equal

If it ends at \0, str2 is a substring of str1

If they are not equal, re-find the address of the record location, go back +1. Go back and start the match again, where the str2 pointer re-points to the start address of the array

char* my_strstr(const char* str1, const char* str2)
{
	assert(str1 && str2);

	const char* s1 = str1;
	const char* s2 = str2;

	const char* cur = str1;
	while (*cur)//cur不等于\0进来 
	{
		s1 = cur;//判断失败返回cur指向的位置
		s2 = str2;//判断失败回到起始位置

		while (*s1 && *s2 && (*s1 == *s2))//两个字符串都被查找完,没有数据了
		{
			s1++;
			s2++;
		}
		if (*s2 == '\0')//字串找到,返回记录地址
		{
			return (char*)cur;
		}
		cur++;//匹配不成功,指向下一步
	}

	return NULL;//找不到
}

int main()
{
	char arr1[] = "abbbcdef";
	char arr2[] = "bbc";
	char* ret = my_strstr(arr1, arr2);
	if (NULL == ret)
	{
		printf("找不到子串\n");
	}
	else
	{
		printf("%s\n", ret);
	}
	return 0;
}

2.6 strtok

Role: specify the delimiter, let the array be segmented

char * strtok ( char * str, const char * sep );

 The sep parameter is a string that defines the set of characters to be used as separators

The first parameter specifies a string containing zero or more tokens separated by one or more delimiters in the sep string.

The strtok function finds the next token in str, ends it with \0, and returns a pointer to this token. (Note: The strtok function will change the string being manipulated, so the string segmented by the strtok function is generally the content of a temporary copy and can be modified.)

 The first parameter of the strtok function is not NULL, the function will find the first token in str, and the strtok function will save its position in the string.

The first parameter of the strtok function is NULL, and the function will start at the saved position in the same string and look for the next token.

If there are no more tokens in the string, a NULL pointer is returned.

int main()
{
	char arr[] = "[email protected]";
  //char arr[] = "lanyangyang\0landawang\0cunba";  strtok函数会把数组变成这样
	char buf[50] = { 0 };
    const char* sep = "@. ";
	strcpy(buf, arr);
    
   //printf("%s\n", strtok(buf, sep));//只找第一个标记
   //printf("%s\n", strtok(NULL, sep));//是从保存的好的位置开始继续往后找
   //printf("%s\n", strtok(NULL, sep));//是从保存的好的位置开始继续往后找
优化
    char* str = NULL;
	for (str=strtok(buf, sep); str!=NULL; str=strtok(NULL, sep))
	{
		printf("%s\n", str);
	}

    return 0;
}


2.7 strerror perror returns the error code and the corresponding error message

char * strerror ( int errnum );

 When the library function fails to use, it will leave an error code errno (global variable), similar to the website 404 error code

strerror is the translation error message

#include <string.h>
#include <limits.h>
#include <errno.h>
#include <stdlib.h>

int main()
{
	printf("%s\n", strerror(0));
	printf("%s\n", strerror(1));
	printf("%s\n", strerror(2));
	printf("%s\n", strerror(3));
	int* p = (int*)malloc(INT_MAX);//想堆区申请内存的
	if (p == NULL)
	{
		printf("%s\n", strerror(errno));
		perror("Malloc");//perror是打印错误信息,strerror是把错误码转换成错误信息
 //只需要写字符串,然后输出错误码对应得错误信息,更加方便
		return 1;
	}
	return 0;
}

Returns the address of the first character of the error message string corresponding to the error code

 


2.8 Character Classification Function

function Returns true if its argument meets the following conditions
iscntrl any control character
sspace Whitespace characters: space ' ', form feed '\f', line feed '\n', carriage return '\r', tab '\t' or vertical tab '\v'
isdigit Decimal digits 0~9
isxdigit Hexadecimal digits, including all decimal digits, lowercase letters a~f, uppercase letters A~F
islower lowercase letters a~z
isupper Capital letters A~Z
isalpha Letters a~z or A~Z
isalnum Letters or numbers, a~z, A~Z, 0~9
ispunct Punctuation marks, any graphic characters that are not numbers or letters (printable)
isgraph any graphic character
sprint Any printable character, including graphic characters and whitespace

 

 Example: isdigit receives the ASCII code value of the character and returns the int type (if it is a numeric character, it returns non-0, if it is not an array character, it returns 0)

#include <ctype.h>

int main()
{
int ret = isdigit('5');//5
int ret = isdigit('Q');//0
printf("%d\n", ret);

return 0;
}
char ch = 'A';

	if (ch >= 'a' && ch <= 'z')
	{

	}

这样写很麻烦,我们一个函数搞定

int ret = islower(ch);//判断是否小写,是小写字母返回非0,否则返回0,快速判断

Character conversion:

int tolower ( int c );
int toupper ( int c );
	int main()
{
	char ch = 'A';
	putchar(toupper(ch));
	putchar(tolower(ch));

	return 0;
}


3. Memory operation functions

3.1 memcpy memory space data copy

void * memcpy ( void * destination, const void * source, size_t count );

Precautions:

1. The function memcpy copies count bytes of data backwards from the location of the source to the memory location of the destination.

2. This function does not stop when it encounters '\0'.

3. If there is any overlap between source and destination, the result of the copy is undefined.

4. Return the starting address of the destination

int main()
{
	int arr1[10] = { 1,2,3,4,5,6,7,8,9,10 };
	int arr2[5] = { 0 };
	return 0;
    memcpy(arr2,arr1,20);//拷贝20个字节
}

3.2 Simulate the implementation of memcpy

Ideas:  

1. When the author implements the memcpy function, the author does not know what data you want to copy, similar to qsort 

2. When copying, the type conversion should be cast according to the data type, copying byte by byte

#include <stdio.h>
#include <string.h>
#include <assert.h>

//void* my_memcpy(void* dest, const void* src, size_t count)
//{
//	assert(src && dest);
//	while (count--)
//	{
//		*(char*)dest = *(char*)src;
//		dest = (char*)dest + 1;
//		src = (char*)src + 1;
//	}
// }

void* my_memcpy(void* dest, const void* src, size_t count)
{
	assert(dest && src);
	void* ret = dest;
	while (count--)
	{
		*(char*)dest = *(char*)src;
		dest = (char*)dest + 1;
		src = (char*)src + 1;
	}

	return ret;
}


int main()
{
	int arr1[10] = { 1,2,3,4,5,6,7,8,9,10 };
	int arr2[5] = { 0 };
	my_memcpy(arr2, arr1, 20);



    int arr1[10] = { 1,2,3,4,5,6,7,8,9,10 };   
    // 1 2 1 2 3 4 5 8 9 10 期望的结果
	my_memcpy(arr1+2, arr1, 20);

	return 0;
}

But when we want to copy the data in the same space, the data is wrong

 The reason is that the data overwrites the space we want to copy

 In the same memory copy, the target and source data spaces are intersected, we should use memmove


3.3 memmove can achieve overlapping memory copy

void *memmove( void *dest, const void *src, size_t count );

#include <string.h>

int main()
{
		int arr1[10] = { 1,2,3,4,5,6,7,8,9,10 };
		memmove(arr1 + 2, arr1, 20);
	return 0;
}

1

3.4 Simulation to achieve memmove

Ideas:

Copying 34567 to 12345 will not overwrite the data (when dest<src)

 And if we want to copy 34567 to 45678, it will overwrite the data, we can first put 7 on 8, 6 on 7, 5 on 6... Copy the data from the back to the front, so that the data will not be covered

 Summary: When the address to be copied is dest > src address, copy from back to front; when dest < src, copy from front to back

When there is no intersection between dest and src space, the relationship between the front and back does not matter. We copy from back to front by default here (convenient)

{
	if (dest > src)
	{
		;  //从后向前拷贝
	}
	else
	{
		;  //从前向后拷贝
	}
}

It can also be written in another way

if (dest > src && dest<((char*)src+count))
	{
		;//从后向前拷贝
	}
	else
	{
		;//从前向后拷贝
	}

Code idea: The code from front to back is memcpy simulation, from back to front, we need +20 bytes to the end of dest and src

void* my_memmove(void* dest, const void* src, size_t count)
{
	assert(dest && src);
	void* ret = dest;
	if (dest > src)
	{
		while (count--)
		{
	*((char*)dest + count) =  *((char*)src + count);  //count=19,正好指向最后一个字节
		}
		//从后向前拷贝
	}
	else
	{
			void* ret = dest;
			while (count--)
			{
				*(char*)dest = *(char*)src;
				dest = (char*)dest + 1;
				src = (char*)src + 1;
			}
		 //从前向后拷贝
	}
	return ret;
}


	int main()
{
	int arr1[10] = { 1,2,3,4,5,6,7,8,9,10 };
	my_memmove(arr1 + 2, arr1, 20);
	//my_memmove(arr1 , arr1+2, 20);
	return 0;
}

3.5 memcpy memory byte correspondence comparison

int memcmp ( const void * ptr1, 
 const void * ptr2, 
 size_t num );//比较的字节个数

 

 int main()
{
	int arr1[] = { 1,2,3,4,5 };
	int arr2[] = { 1,2,3,4,0x11223305 };
	int ret = memcmp(arr1, arr2, 18);
	printf("%d\n", ret);

	return 0;
}


3.6 memset memory setting in bytes

void *memset( void *dest, int c, size_t count );//目的空间,设置的字符,字符个数

 int main()
{
	int arr[] = { 0x11111111,0x22222222,3,4,5 };
	memset(arr, 6, 20);//memset是以字节为单位来初始化内存单元的
	return 0;
}

Guess you like

Origin blog.csdn.net/weixin_63543274/article/details/123986777