.NET下AhoCorasick使用示例

KeyFilter.cs

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;

public partial class KeyFilter : System.Web.UI.Page
{
    
    
    protected void Page_Load(object sender, EventArgs e)
    {
    
    
        string text_1 =
            "hello and welcome to this beautiful world!王小波:《沉默的大多数》 别和她说话\r\n         唤醒心中的巨人\r\n         激发无限的潜力\r\n         吸引力法则\r\n         苏菲的世界\r\n         犯罪心理学\r\n         怪诞行为学\r\n         阿尔弗雷德·阿德勒:自卑与超越";

        AhoCorasick.Trie trie_1 = new AhoCorasick.Trie();
        trie_1.Add("hello");
        trie_1.Add("怪诞");
        trie_1.Add("犯罪");
        trie_1.Build();

        string[] matches = trie_1.Find(text_1).ToArray();
        matches.ToList().ForEach(str => {
    
     Response.Write(str + "<br>"); });

        //---------------
        string text =
            "hello and welcome to this beautiful world!王小波:《沉默的大多数》         穷人的银行\r\n        24堂财富课\r\n        一生的理财计划\r\n        世界商道\r\n        世界营销500招\r\n        创业者\r\n        变成有钱人不难\r\n        国富论\r\n        宏观经济学\r\n        经济学及税赋原理\r\n        曼昆经济学原理\r\n        牛奶可乐经济学\r\n        穷爸爸富爸爸\r\n        经济学原理\r\n        证券分析\r\n        货币战争1\r\n        货币战争2 金权天下\r\n        货币战争3 金融高边疆\r\n        货币战争4 战国时代\r\n        货币战争5 山雨欲来\r\n        货币战争升级版\r\n        邻家的百万富翁";

        AhoCorasick.Trie trie = new AhoCorasick.Trie();
        trie.Add("hello");
        trie.Add("沉默");
        trie.Add("经济学");
        trie.Add("战争");
        trie.Add("战国");
        trie.Build();
        if (trie.Find(text).Any())
        {
    
    
            trie.Find(text).ToList().ForEach(n => {
    
     Response.Write(n + "<br>"); });
        }

        //-------------------------
        string[] text_2 = "one two three four".Split(' ');

        AhoCorasick.Trie<string, bool> trie_2 = new AhoCorasick.Trie<string, bool>();
        trie_2.Add(new[] {
    
    "three", "four"}, true);
        trie_2.Build();

        bool containsthreefour = trie_2.Find(text_2).Any();
        Response.Write(containsthreefour + "<br>");
        //----------------------------------
        string[] text_3 = "hello world i say to you".Split(' ');

        AhoCorasick.Trie<string, bool> trie_3 = new AhoCorasick.Trie<string, bool>();
        trie_3.Add("hello world".Split(' '), true);
        trie_3.Build();
        bool containsHelloWorld = trie_3.Find(text_3).Any();
        Response.Write(containsHelloWorld + "<br>");
        //--------------------------------
        //AhoCorasick.Trie<int> trie_3 = new AhoCorasick.Trie<int>();

         add words
        //trie_3.Add("hello", 123);
        //trie_3.Add("world", 456);

         build search tree
        //trie_3.Build();

         retrieve IDs
        //foreach (int id in trie_3.Find(text))
        //{
    
    
        //    Console.WriteLine(id);
        //}
    }
}

AhoCorasick.cs

using System.Collections;
using System.Collections.Generic;

namespace AhoCorasick
{
    
    
    /// <summary>
    /// Trie that will find and return strings found in a text.
    /// 尝试查找并返回在文本中找到的字符串
    /// </summary>
    public class Trie : Trie<string>
    {
    
    
        /// <summary>
        /// 添加一个字符串
        /// </summary>
        /// <param name="s">要添加的字符串.</param>
        public void Add(string s)
        {
    
    
            Add(s, s);
        }

        /// <summary>
        /// 添加多个字符串
        /// </summary>
        /// <param name="strings">要添加的字符串.</param>
        public void Add(IEnumerable<string> strings)
        {
    
    
            foreach (string s in strings)
            {
    
    
                Add(s);
            }
        }
    }

    /// <summary>
    /// Trie that will find strings in a text and return values of type <typeparamref name="T"/>
    /// for each string found.
    /// 尝试在文本中查找字符串,并为找到的每个字符串返回类型的值。
    /// </summary>
    /// <typeparam name="TValue">Value type.</typeparam>
    public class Trie<TValue> : Trie<char, TValue>
    {
    
    
    }

    /// <summary>
    /// Trie that will find strings or phrases and return values of type <typeparamref name="T"/>
    /// for each string or phrase found.
    /// 尝试查找字符串或短语,并为找到的每个字符串或短语返回类型的值。
    /// </summary>
    /// <remarks>
    /// <typeparamref name="T"/> 
    /// 通常是用于查找字符串的char或用于查找短语或整个单词的字符串。
    /// </remarks>
    /// <typeparam name="T">单词中字母的类型</typeparam>
    /// <typeparam name="TValue">找到单词时将返回的值的类型</typeparam>
    public class Trie<T, TValue>
    {
    
    
        /// <summary>
        /// Root of the trie. It has no value and no parent.
        /// </summary>
        private readonly Node<T, TValue> root = new Node<T, TValue>();

        /// <summary>
        /// Adds a word to the tree.
        /// </summary>
        /// <remarks>
        /// A word consists of letters. A node is built for each letter.
        /// If the letter type is char, then the word will be a string, since it consists of letters.
        /// But a letter could also be a string which means that a node will be added
        /// for each word and so the word is actually a phrase.
        /// </remarks>
        /// <param name="word">The word that will be searched.</param>
        /// <param name="value">The value that will be returned when the word is found.</param>
        public void Add(IEnumerable<T> word, TValue value)
        {
    
    
            // start at the root
            var node = root;

            // build a branch for the word, one letter at a time
            // if a letter node doesn't exist, add it
            foreach (T c in word)
            {
    
    
                var child = node[c];

                if (child == null)
                    child = node[c] = new Node<T, TValue>(c, node);

                node = child;
            }

            // mark the end of the branch
            // by adding a value that will be returned when this word is found in a text
            node.Values.Add(value);
        }


        /// <summary>
        /// Constructs fail or fall links.
        /// </summary>
        public void Build()
        {
    
    
            // construction is done using breadth-first-search
            var queue = new Queue<Node<T, TValue>>();
            queue.Enqueue(root);

            while (queue.Count > 0)
            {
    
    
                var node = queue.Dequeue();

                // visit children
                foreach (var child in node)
                    queue.Enqueue(child);

                // fail link of root is root
                if (node == root)
                {
    
    
                    root.Fail = root;
                    continue;
                }

                var fail = node.Parent.Fail;

                while (fail[node.Word] == null && fail != root)
                    fail = fail.Fail;

                node.Fail = fail[node.Word] ?? root;
                if (node.Fail == node)
                    node.Fail = root;
            }
        }

        /// <summary>
        /// Finds all added words in a text.
        /// </summary>
        /// <param name="text">The text to search in.</param>
        /// <returns>The values that were added for the found words.</returns>
        public IEnumerable<TValue> Find(IEnumerable<T> text)
        {
    
    
            var node = root;

            foreach (T c in text)
            {
    
    
                while (node[c] == null && node != root)
                    node = node.Fail;

                node = node[c] ?? root;

                for (var t = node; t != root; t = t.Fail)
                {
    
    
                    foreach (TValue value in t.Values)
                        yield return value;
                }
            }
        }

        /// <summary>
        /// Node in a trie.
        /// </summary>
        /// <typeparam name="TNode">The same as the parent type.</typeparam>
        /// <typeparam name="TNodeValue">The same as the parent value type.</typeparam>
        private class Node<TNode, TNodeValue> : IEnumerable<Node<TNode, TNodeValue>>
        {
    
    
            private readonly TNode word;
            private readonly Node<TNode, TNodeValue> parent;

            private readonly Dictionary<TNode, Node<TNode, TNodeValue>> children =
                new Dictionary<TNode, Node<TNode, TNodeValue>>();

            private readonly List<TNodeValue> values = new List<TNodeValue>();

            /// <summary>
            /// Constructor for the root node.
            /// </summary>
            public Node()
            {
    
    
            }

            /// <summary>
            /// Constructor for a node with a word
            /// </summary>
            /// <param name="word"></param>
            /// <param name="parent"></param>
            public Node(TNode word, Node<TNode, TNodeValue> parent)
            {
    
    
                this.word = word;
                this.parent = parent;
            }

            /// <summary>
            /// Word (or letter) for this node.
            /// </summary>
            public TNode Word
            {
    
    
                get {
    
     return word; }
            }

            /// <summary>
            /// Parent node.
            /// </summary>
            public Node<TNode, TNodeValue> Parent
            {
    
    
                get {
    
     return parent; }
            }

            /// <summary>
            /// Fail or fall node.
            /// </summary>
            public Node<TNode, TNodeValue> Fail {
    
     get; set; }

            /// <summary>
            /// Children for this node.
            /// </summary>
            /// <param name="c">Child word.</param>
            /// <returns>Child node.</returns>
            public Node<TNode, TNodeValue> this[TNode c]
            {
    
    
                get {
    
     return children.ContainsKey(c) ? children[c] : null; }
                set {
    
     children[c] = value; }
            }

            /// <summary>
            /// Values for words that end at this node.
            /// </summary>
            public List<TNodeValue> Values
            {
    
    
                get {
    
     return values; }
            }

            /// <inherit/>
            public IEnumerator<Node<TNode, TNodeValue>> GetEnumerator()
            {
    
    
                return children.Values.GetEnumerator();
            }

            /// <inherit/>
            IEnumerator IEnumerable.GetEnumerator()
            {
    
    
                return GetEnumerator();
            }

            /// <inherit/>
            public override string ToString()
            {
    
    
                return Word.ToString();
            }
        }
    }
}

如图

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/WuLex/article/details/108367630