【Data structure】Definition and algorithm of string

string

1. Definition
The fixed-length sequential storage of the string structure is similar to the sequential storage structure of the linear table, using a group of storage units with continuous addresses to store the character sequence of the string value.
2. Structure
In the fixed-length sequential storage structure of strings, each defined string variable is allocated a fixed-length storage area according to the predefined size.

	则可用定长数组如下描述之。
	
     typedef unsigned char SString[MAXLEN+1];

Note:
Use SString[0] to store the string length information;
add an identifier '\0' which is not included in the string length after the string value; the
actual length of the string can be freely within the range of the predetermined length, and the string value exceeding the predetermined length will be discarded, which is called "truncation"

Terminology:
string length: the number of characters in the string; a string of zero characters is called an empty string, recorded as space string: a
string composed of one or more spaces, whose length is the number of space characters in the string Substring: a subsequence composed of any consecutive characters in a string
s is called a substring of the string, and the string s containing the substring is called the main string accordingly.
Substring position: The serial number of a character in the sequence is called the position of the character in the string. The position of the substring in the main string is represented by the position of the first character in the main string.
String Equality: Two strings are equal if and only if the values ​​of the two strings are equal. That is, the two strings are equal only when the lengths of the two strings are equal, and the characters in each corresponding position are equal.

string type definition

		ADT string {
					数据对象:
					数据关系:
					数据操作:
						StrAssign(&T,chars);   //串赋值,生成一个值等于chars的串T
					StrCompare(S,T);   //串比较,若S>T,返回值>0……
						StrLength(S);   //求串长
						Concat(&T,S1,S2);   //串连接,用T返回S1+S2的新串
						SubString(&Sub,S,p,len);  //求S中p位置起长度为len的子串
						……
						Index(S,T,p);  //返回子串T在主串S中p字符之后首次出现的位置
						Replace(&S,T,V);  //用子串V代替串S中所有的子串T
		}string

Basic operations on strings

/ — Initialization — /

/*---初始化---*/
void InitSString(SString s)
{
    s[0] = '\0';
}

/ — string print — /

/*---串打印---*/
void PrintString(SString s)
{
    printf("\n%s",s+1);
}

/ — String assignment — /

/*---字符串的赋值---*/
void StrAssign(SString s,char *str)
{
    int len = strlen(str);
    for(int i = 0;i<len;i++)
    {
        s[i+1] = str[i];
    }
    s[len+1] = '\0';
    s[0] = len;
}

/ — find the length — /

/*---求长度---*/
int Strlenth(SString s)
{
    int len = 0;

    while(*s!='\0')
    {

        len++;
        s++;
    }
    return len;

}

/ — create string — /

/*---创建串---*/
bool CreatSString(SString s,char *str)
{
    InitSString(s);         //初始化
    StrAssign(s,str);       //串赋值
}

/ — string concatenation — /

/*---串连接---*/
void Concat(SString t,SString s1,SString s2)
{
    int lens1= (int)s1[0];
    int lens2 = (int)s2[0];

    int i = 1,j = 1;


    while(i<=lens1)
    {
        t[i] = s1[i];
        i++;

    }
    t[0] = lens1;
    i--;                //根据结束条件 i=lens1+1,故t[i]此时没有赋值

    if((lens1+lens2)<MAXLEN)
    {
        while(j<=lens2)
        {
            t[i+j] = s2[j];
            j++;
        }
        t[0] = lens1+lens2;
    }
    else
    {
        while(j<MAXLEN-lens1)
        {
            t[i+j] = s2[j];
            j++;
        }
        t[0] = MAXLEN;
    }
    t[i+j+1] = '\0';

}

/ — find substring — /

/*---求子串---*/
bool SubString(SString sub,SString s,int p,int len)
{
    /*get substring  */
    int i;
    if(p<1||p>s[0]||len<0||len>s[0]-p+1)
        /*子串长度为零/当子串长度大于主串/子串长度小于零/子串长度大于主串截取位置到最后的长度,错误*/
		return false;
    else
    {
        for(i=1;i<=len;i++)
           sub[i]=s[i+p-1];  /*复制字符 */
        sub[i]='\0';
        sub[0]=len;          /*修改表长*/
	   return true;
    }
}

/ — string comparison — /

/*---字符串比较---*/
int StrCompare(SString s,SString t)
{
    int result = 0;//当比较结构相同时,为零。
    /*从s[1] 和 t[1] 开始比较*/
    s++;
    t++;

    while(*s!='\0' || *t!='\0')
    {
        result = *s - *t;
        if(result!= 0)
            break;/*如果不相等,得出结果并退出*/
        /*相等,下一组比较*/
        s++;
        t++;
    }

    return result;


}

/ — delete substring — /

/*---删除子串---*/
//删除字符串:删除字符串S从pos位置开始的len长度子串
void StrDelete(SString s,int pos,int len)
{
    int s_len = Strlenth(s);//获取字符串的长度

    for(int i = pos + len;i<=s_len;i++)
    {
        //将字符串中从pos+len位置开始的字符全部前移len个
        s[i-len] = s[i];
    }

    s[0] = s_len-len;
    s[s_len-len+1] = '\0';

}

/ — string clear — /

/*---字符串清除---*/

void StrClear(SString s)
{
	s[0] = '\0';
}

/ — pattern matching (kmp algorithm) — /

/*---模式匹配(kmp算法)---*/
int StrIndex_kmp(SString s, char *str)
{
    int i = 1;
    int j = 0;
    int next[MAXLEN];
    int sLen = strlen(s);
    int pLen = strlen(str);
    GetNext(str,next);
    while (i < sLen && j < pLen)
    {
        //①如果j = -1,或者当前字符匹配成功(即S[i] == P[j]),都令i++,j++
        if (j == -1 || s[i] == str[j])
        {
            i++;
            j++;
        }
        else
        {
            //②如果j != -1,且当前字符匹配失败(即S[i] != P[j]),则令 i 不变,j = next[j]
            //next[j]即为j所对应的next值
            j = next[j];
        }
    }
    if (j == pLen)
        return i-pLen;
    else
        return -1;
}
/*---获得next数组---*/
 void GetNext(char *t,int next[])
{
   int j=0,k=-1;
        next[0]=-1;
    int len = strlen(t);

   while(j<len)
   {
      if(k == -1 || t[j] == t[k])
      {
         j++;k++;
         if(t[j]==t[k])//当两个字符相同时,就跳过
            next[j] = next[k];
         else
            next[j] = k;
      }
      else k = next[k];
   }
}

Other storage structures for strings

1. Sequential dynamic storage

typedef struct {
		char ch[maxsize];
		 int length;
		} String, *string;

2. Chain storage

	trpedef struct chunk{
	char ch[maxlen];
	struct chunk *next
	}chunk;
	
	
	typedef struct {
	chunk *head,*tail;
	int len;
	}lstring;

When maxlen=1, each node stores a character, which is convenient for inserting and deleting operations, but the storage space utilization rate is too low; when maxlen>1, each node stores multiple characters, and when the last character is not full, use specific characters (such as "#") to fill in the gap. At this time, the storage density is improved, but the insertion and deletion processing methods are more complicated, and the splitting and merging of nodes need to be considered.

Guess you like

Origin blog.csdn.net/qq_25218219/article/details/121063731