Trie tree parsing

theoretical explanation

Trie tree, of course, is a tree structure. A character is stored on the nodes of the tree (except the root node), and the leaf node will have an additional flag to mark the current node as a leaf node. The solution I implement below is to add a structure to the structure. The count attribute is set. If it is a leaf node, the count attribute is greater than 0.


QQ20180505-0.png

The picture is ugly, sorry for that. This picture is equivalent to saving the four strings of {"cat", "cd", "dog", "ff"}. If there is now the string "asddsadacat", search for the first occurrence of those four strings in this string, or -1 if it does not exist. This only needs to search the Trie tree. If you don't understand it now, please read the implementation later and compare this picture. It should be easy to understand.


usefulness

Each node of the Trie tree saves a character, which is often used to save strings, sort strings, and search strings. The following code implementation is to search for the position of a string in another string. If it is not found, it will be -1. If found, the position of the main string is returned. Of course what motivated me to study Trie trees was the aho-corasick algorithm. This algorithm will also be followed by a blog. The scene of this algorithm will be mentioned later. Before reading the algorithm, you must first understand this article.



Code

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_CHILD 26
typedef struct tree
{
        int count;
        struct tree *child[MAX_CHILD];  
} Tree_node, *Trie_node;

Trie_node createNode()
{
        Trie_node n;
        n = (Trie_node) malloc(sizeof(Tree_node));
        memset(n, 0, sizeof(Tree_node));
        return n;
}

int insertNode(Trie_node root, char *str)
{
        Trie_node temp, new_node;
        temp = root;
        char *p = str;
        int child_index;
        while(*p != '\0')
        {
                child_index = *p - 'a';         
                if(temp->child[child_index] != NULL)
                {
                        temp = temp->child[child_index];
                }
                else
                {
                        new_node = createNode();
                        temp->child[child_index] = new_node;
                        temp = temp->child[child_index];
                }
                p++;
        }
        temp->count++;
        return 1;
}

int strpos(Trie_node root, char *handle_str)
{
        char *p = handle_str;
        Trie_node temp;
        temp = root;
        int pos = -1;
        int key = 0;
        int child_index;
        int start_key = 0;
        while(p[key] != '\0')           
        {
                child_index = p[key] - 'a';
                if(temp->child[child_index] != NULL)
                {
                        if(start_key == 0)
                        {
                                start_key = key;
                        }
                        temp = temp->child[child_index];        
                }
                else if(start_key > 0)
                {
                        start_key = 0;
                        temp = root;
                }       
                key++;
                if(temp->count > 0)
                {
                        pos = start_key;
                        break;
                }
        }
        return pos;
}

void destroyTrie(Trie_node root)
{
        Trie_node temp;
        temp = root;
        for(int i = 0; i < MAX_CHILD; i++)
        {
                if(temp->child[i])
                {
                        destroyTrie(temp->child[i]);
                }
        }
        free(root);
}

intmain()
{
        Trie_node root;
        root = createNode();
        char *posstr = "esd";
        insertNode(root, posstr);
        char *handle_str = "afeweasddesd";
        int tpos = strpos(root, handle_str);
        printf("find string pos is %d\n", tpos);
        destroyTrie(root);
        return 0;
}


I use the most primitive method to implement the strpos method:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int strpos(char *handle, char *pos)
{
        int m, n;
        int r = -1;
        for(int i = 0; handle[i] != '\0'; i++)
        {
                m = i;  
                n = 1;
                for(int j = 0; pos[j] != '\0'; j++)
                {
                        if(handle[m] != pos[j])
                        {
                                n = 0;
                                break;
                        }
                        m++;            
                }
                if(n == 1)
                {
                        r = i;
                        break;
                }
        }

        return r;       
}

intmain()
{
        char s[10] = "afk asdef";
        char pos[5] = "afk";
        int r = strpos(s, pos);
        printf("%d\n", r);
        return 0;
}


Compared with the strpos method implemented by the Trie tree, the original method code is very simple, only 2 loops, but the time complexity is obviously the square of n. The performance is significantly inferior to Trie trees.

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325298744&siteId=291194637