KeyFilter.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
public partial class KeyFilter : System.Web.UI.Page
{
protected void Page_Load(object sender, EventArgs e)
{
string text_1 =
"hello and welcome to this beautiful world!王小波:《沉默的大多数》 别和她说话\r\n 唤醒心中的巨人\r\n 激发无限的潜力\r\n 吸引力法则\r\n 苏菲的世界\r\n 犯罪心理学\r\n 怪诞行为学\r\n 阿尔弗雷德·阿德勒:自卑与超越";
AhoCorasick.Trie trie_1 = new AhoCorasick.Trie();
trie_1.Add("hello");
trie_1.Add("怪诞");
trie_1.Add("犯罪");
trie_1.Build();
string[] matches = trie_1.Find(text_1).ToArray();
matches.ToList().ForEach(str => {
Response.Write(str + "<br>"); });
//---------------
string text =
"hello and welcome to this beautiful world!王小波:《沉默的大多数》 穷人的银行\r\n 24堂财富课\r\n 一生的理财计划\r\n 世界商道\r\n 世界营销500招\r\n 创业者\r\n 变成有钱人不难\r\n 国富论\r\n 宏观经济学\r\n 经济学及税赋原理\r\n 曼昆经济学原理\r\n 牛奶可乐经济学\r\n 穷爸爸富爸爸\r\n 经济学原理\r\n 证券分析\r\n 货币战争1\r\n 货币战争2 金权天下\r\n 货币战争3 金融高边疆\r\n 货币战争4 战国时代\r\n 货币战争5 山雨欲来\r\n 货币战争升级版\r\n 邻家的百万富翁";
AhoCorasick.Trie trie = new AhoCorasick.Trie();
trie.Add("hello");
trie.Add("沉默");
trie.Add("经济学");
trie.Add("战争");
trie.Add("战国");
trie.Build();
if (trie.Find(text).Any())
{
trie.Find(text).ToList().ForEach(n => {
Response.Write(n + "<br>"); });
}
//-------------------------
string[] text_2 = "one two three four".Split(' ');
AhoCorasick.Trie<string, bool> trie_2 = new AhoCorasick.Trie<string, bool>();
trie_2.Add(new[] {
"three", "four"}, true);
trie_2.Build();
bool containsthreefour = trie_2.Find(text_2).Any();
Response.Write(containsthreefour + "<br>");
//----------------------------------
string[] text_3 = "hello world i say to you".Split(' ');
AhoCorasick.Trie<string, bool> trie_3 = new AhoCorasick.Trie<string, bool>();
trie_3.Add("hello world".Split(' '), true);
trie_3.Build();
bool containsHelloWorld = trie_3.Find(text_3).Any();
Response.Write(containsHelloWorld + "<br>");
//--------------------------------
//AhoCorasick.Trie<int> trie_3 = new AhoCorasick.Trie<int>();
add words
//trie_3.Add("hello", 123);
//trie_3.Add("world", 456);
build search tree
//trie_3.Build();
retrieve IDs
//foreach (int id in trie_3.Find(text))
//{
// Console.WriteLine(id);
//}
}
}
AhoCorasick.cs
using System.Collections;
using System.Collections.Generic;
namespace AhoCorasick
{
/// <summary>
/// Trie that will find and return strings found in a text.
/// 尝试查找并返回在文本中找到的字符串
/// </summary>
public class Trie : Trie<string>
{
/// <summary>
/// 添加一个字符串
/// </summary>
/// <param name="s">要添加的字符串.</param>
public void Add(string s)
{
Add(s, s);
}
/// <summary>
/// 添加多个字符串
/// </summary>
/// <param name="strings">要添加的字符串.</param>
public void Add(IEnumerable<string> strings)
{
foreach (string s in strings)
{
Add(s);
}
}
}
/// <summary>
/// Trie that will find strings in a text and return values of type <typeparamref name="T"/>
/// for each string found.
/// 尝试在文本中查找字符串,并为找到的每个字符串返回类型的值。
/// </summary>
/// <typeparam name="TValue">Value type.</typeparam>
public class Trie<TValue> : Trie<char, TValue>
{
}
/// <summary>
/// Trie that will find strings or phrases and return values of type <typeparamref name="T"/>
/// for each string or phrase found.
/// 尝试查找字符串或短语,并为找到的每个字符串或短语返回类型的值。
/// </summary>
/// <remarks>
/// <typeparamref name="T"/>
/// 通常是用于查找字符串的char或用于查找短语或整个单词的字符串。
/// </remarks>
/// <typeparam name="T">单词中字母的类型</typeparam>
/// <typeparam name="TValue">找到单词时将返回的值的类型</typeparam>
public class Trie<T, TValue>
{
/// <summary>
/// Root of the trie. It has no value and no parent.
/// </summary>
private readonly Node<T, TValue> root = new Node<T, TValue>();
/// <summary>
/// Adds a word to the tree.
/// </summary>
/// <remarks>
/// A word consists of letters. A node is built for each letter.
/// If the letter type is char, then the word will be a string, since it consists of letters.
/// But a letter could also be a string which means that a node will be added
/// for each word and so the word is actually a phrase.
/// </remarks>
/// <param name="word">The word that will be searched.</param>
/// <param name="value">The value that will be returned when the word is found.</param>
public void Add(IEnumerable<T> word, TValue value)
{
// start at the root
var node = root;
// build a branch for the word, one letter at a time
// if a letter node doesn't exist, add it
foreach (T c in word)
{
var child = node[c];
if (child == null)
child = node[c] = new Node<T, TValue>(c, node);
node = child;
}
// mark the end of the branch
// by adding a value that will be returned when this word is found in a text
node.Values.Add(value);
}
/// <summary>
/// Constructs fail or fall links.
/// </summary>
public void Build()
{
// construction is done using breadth-first-search
var queue = new Queue<Node<T, TValue>>();
queue.Enqueue(root);
while (queue.Count > 0)
{
var node = queue.Dequeue();
// visit children
foreach (var child in node)
queue.Enqueue(child);
// fail link of root is root
if (node == root)
{
root.Fail = root;
continue;
}
var fail = node.Parent.Fail;
while (fail[node.Word] == null && fail != root)
fail = fail.Fail;
node.Fail = fail[node.Word] ?? root;
if (node.Fail == node)
node.Fail = root;
}
}
/// <summary>
/// Finds all added words in a text.
/// </summary>
/// <param name="text">The text to search in.</param>
/// <returns>The values that were added for the found words.</returns>
public IEnumerable<TValue> Find(IEnumerable<T> text)
{
var node = root;
foreach (T c in text)
{
while (node[c] == null && node != root)
node = node.Fail;
node = node[c] ?? root;
for (var t = node; t != root; t = t.Fail)
{
foreach (TValue value in t.Values)
yield return value;
}
}
}
/// <summary>
/// Node in a trie.
/// </summary>
/// <typeparam name="TNode">The same as the parent type.</typeparam>
/// <typeparam name="TNodeValue">The same as the parent value type.</typeparam>
private class Node<TNode, TNodeValue> : IEnumerable<Node<TNode, TNodeValue>>
{
private readonly TNode word;
private readonly Node<TNode, TNodeValue> parent;
private readonly Dictionary<TNode, Node<TNode, TNodeValue>> children =
new Dictionary<TNode, Node<TNode, TNodeValue>>();
private readonly List<TNodeValue> values = new List<TNodeValue>();
/// <summary>
/// Constructor for the root node.
/// </summary>
public Node()
{
}
/// <summary>
/// Constructor for a node with a word
/// </summary>
/// <param name="word"></param>
/// <param name="parent"></param>
public Node(TNode word, Node<TNode, TNodeValue> parent)
{
this.word = word;
this.parent = parent;
}
/// <summary>
/// Word (or letter) for this node.
/// </summary>
public TNode Word
{
get {
return word; }
}
/// <summary>
/// Parent node.
/// </summary>
public Node<TNode, TNodeValue> Parent
{
get {
return parent; }
}
/// <summary>
/// Fail or fall node.
/// </summary>
public Node<TNode, TNodeValue> Fail {
get; set; }
/// <summary>
/// Children for this node.
/// </summary>
/// <param name="c">Child word.</param>
/// <returns>Child node.</returns>
public Node<TNode, TNodeValue> this[TNode c]
{
get {
return children.ContainsKey(c) ? children[c] : null; }
set {
children[c] = value; }
}
/// <summary>
/// Values for words that end at this node.
/// </summary>
public List<TNodeValue> Values
{
get {
return values; }
}
/// <inherit/>
public IEnumerator<Node<TNode, TNodeValue>> GetEnumerator()
{
return children.Values.GetEnumerator();
}
/// <inherit/>
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
/// <inherit/>
public override string ToString()
{
return Word.ToString();
}
}
}
}