C language data structure learning string structure

One, the introduction of the string

​ A string structure is a data structure composed of several elements of the same type, with an end mark at the end, a string is a string structure composed of character elements, '\0' is its end mark, and it can be used to store words , articles, Chinese characters and other text information.

​ With the development of computers and programming languages, strings are used more and more in programs. Strings are referred to as strings. The operations on it are all operations on all characters in the string, and the end symbol is '\0'. , if there is no '\0' at the end of the string, problems such as garbled characters, segment faults, and dirty data may occur.

The functions and algorithms that the string structure should have are:

​ Create: define string

​ Destroy: release the string

​ Empty: delete all characters

​ Copy: it is the strcpy function

​ Connection: it is the strcat function

​ Comparison: it is the strcmp function

​ Length: the strlen function

​ Query string: the strstr function

String Representation and Implementation

​ There are generally two implementation methods for strings, both of which use sequence tables, but the memory is different:

​ 1. Use the stack memory to store characters in a fixed-length method. Once the number of characters exceeds the range of the table, in order to prevent the memory from crossing the boundary, the string needs to be intercepted. Just understand it.

2. Use heap memory fragmentation characters to automatically expand heap memory when manipulating strings.

#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>

typedef struct String
{
    
    
	char* ch;
	size_t size;
}String;

// 创建字符串
String* create_string(void)
{
    
    
	String* str = malloc(sizeof(String));
	str->ch = NULL;
	str->size = 0;
	return str;
}

// 计算长度
size_t len_string(String* str)
{
    
    
	return strlen(str->ch);
}

// 复制
void copy_string(String* str,const char* ch)
{
    
    
	size_t size = strlen(ch)+1;
	if(size > str->size)
	{
    
    
		str->ch = realloc(str->ch,size);
		str->size = size;
	}
	
	strcpy(str->ch,ch);
}

// 构建字符串
String* assign_string(const char* ch)
{
    
    
	String* str = create_string();
	copy_string(str,ch);
	return str;
}

// 判断是否是空串
bool empty_string(String* str)
{
    
    
	return NULL == str->ch;
}

// 赋值
void sav_string(String* str1,String* str2)
{
    
    
	if(str1->size < str2->size)
	{
    
    
		str1->ch = realloc(str1->ch,str2->size);
		str1->size = str2->size;
	}

	strcpy(str1->ch,str2->ch);
}

// 比较
int cmp_string(String* str1,String* str2)
{
    
    
	return strcmp(str1->ch,str2->ch);
}

// 连接
void cat_string(String* str1,String* str2)
{
    
    
	size_t size = strlen(str1->ch)+strlen(str2->ch)+1;
	if(size >= str1->size)
	{
    
    
		str1->ch = realloc(str1->ch,size);
		str1->size = size;
	}
	strcat(str1->ch,str2->ch);
}

// 清空字符串
void clear_string(String* str)
{
    
    
	free(str->ch);
	str->size = 0;
}

// 销毁字符串
void destroy_string(String* str)
{
    
    
	free(str->ch);
	free(str);
}

int main(int argc,const char* argv[])
{
    
    
	/*
	String* str1 = assign_string("hehe");
	String* str2 = create_string();
	// 浅拷贝,两个对象的ch指向了同一个字符串,当其它对象被销毁,另一个会受影响
	*str2 = *str1; // str2->ch = str1->ch; str2->len = str1->len;
	destroy_string(str1);
	puts(str2->ch);
	// 深拷贝,如果结构体中有成员是指针类型,且指向了堆内,这种结构变量不能直接赋值(浅拷贝),为了不出问题,我们需要实现深拷贝
	sav_string(str2,str1);
	destroy_string(str1);
	puts(str2->ch);
	*/
	String* str1 = assign_string("hehe");
	String* str2 = assign_string("xixi12rfaspoikjrqw;elifkj;lasejkrf;oawlikeujf;olaeirjtfasldkjf;qlwiejfa;sldkjf;qwlsekjfa;sldkfj;l");
	cat_string(str1,str2);
	puts(str1->ch);

	return 0;
}

The meaning of the encapsulated string

1. After the string is encapsulated into a data structure, the user does not need to care about the space of the string, but this encapsulation does not make much sense in the C language, because it is more troublesome to use the string operation because of the syntax of the C language.

2. We rewrite the data structure in C++ language, because the syntax of C++ will make the data structure more convenient to use.

Algorithm for substring query

​ Assuming there are two strings str1 and str2, the substring query is to query whether str2 exists in the string str1, and return the first occurrence position of str2 if it exists. This operation is called substring query.

char *str_str(const char *str1, const char *str2)
{
    
    
	assert(NULL != str1 && NULL != str2);
	for (int i = 0, j; '\0' != str1[i]; i++)
	{
    
    
		for (j = 0; '\0' != str2[j] && str2[j] == str1[i + j]; j++)
			;
		if ('\0' == str2[j])
			return (char *)str1 + i;
	}
	return NULL;
}

char *str_str(const char *str1, const char *str2)
{
    
    
	assert(NULL != str1 && NULL != str2);
	int i = 0, j = 0;
	while ('\0' != str1[i] && '\0' != str2[j])
	{
    
    
		if (str1[i] == str2[j])
		{
    
    
			i++;
			j++;
		}
		else
		{
    
    
			i = i - j + 1;
			j = 0;
		}
	}
	return '\0' == str2[j] ? (char *)str1 + i - j : NULL;
}

char* str_str(const char* str1,const char* str2)
{
    
    
	assert(NULL != str1 && NULL != str2);
	int sum1 = 0 , sum2 = 0 , len = 0;
	while('\0'!=str2[len]&&'\0'!=str1[len])
	{
    
    
		sum1 += str1[len];
		sum2 += str2[len++];
	}

	for(int i=len; '\0' != str1[i]; i++)
	{
    
    
		printf("%d %d\n",sum1,sum2);
		if(sum1 == sum2 && 0 == strncmp(str1+i-len,str2,len))
		{
    
    
			return (char*)str1+i-len;
		}
		else
		{
    
    
			sum1 -= str1[i-len];
			sum1 += str1[i];
		}
	}

	return NULL;
}

Guess you like

Origin blog.csdn.net/m0_62480610/article/details/126233885