数据结构备忘录:Trie树基本操作

Trie树是一种可以实现字符串多模匹配的数据结构,在字符串处理中有很重要的作用,本文Trie树实现参考了殷人昆数据结构与算法C++语言描述第二版中的内容。不同的是分支节点的分支结构用C++标准库map容器实现,原因是map基于红黑树,查找速度快,另外节省内存空间,避免浪费

C++实现如下:

  1 #include "pch.h"
  2 #include <map>
  3 #include <stack>
  4 #include <vector>
  5 #include <string>
  6 #include <iostream>
  7 using namespace std;
  8 
  9 struct TrieTreeNode     //Trie树节点类型
 10 {
 11     enum NodeType { DATANODE, BRANCHNODE } type_flag;  //节点类型标志,分支节点或存放关键字的叶节点
 12     union
 13     {
 14         string key_in_trie;   //叶节点关键字
 15         map<string, TrieTreeNode *> sub_ptr;  //分支节点的分支字符和对应的指向分支字符对应的子节点的指针之间的映射关系
 16     };
 17 
 18     TrieTreeNode(const string &k) :type_flag(NodeType::DATANODE), key_in_trie(k) {}
 19     TrieTreeNode() :type_flag(NodeType::BRANCHNODE), sub_ptr() {}
 20 
 21     TrieTreeNode(TrieTreeNode &be_copied)
 22     {
 23         switch (be_copied.type_flag)
 24         {
 25         case NodeType::DATANODE: new (&key_in_trie) string(be_copied.key_in_trie); break;
 26         case NodeType::BRANCHNODE:
 27           {
 28             new (&sub_ptr) map<string, TrieTreeNode *>();
 29             for (map<string, TrieTreeNode *>::iterator p = be_copied.sub_ptr.begin(); p != be_copied.sub_ptr.end(); ++p)
 30             {
 31                 sub_ptr.insert(make_pair(p->first, nullptr));
 32             }
 33           }
 34           break;
 35         }
 36     }
 37      
 38     ~TrieTreeNode()
 39     {
 40         switch (type_flag)
 41         {
 42         case NodeType::DATANODE : key_in_trie.~string(); break;
 43         case NodeType::BRANCHNODE: break;
 44         }
 45     }
 46 };
 47 
 48 class TrieTree
 49 {
 50 public:
 51     bool insert(const string &be_inserted) const;    //Trie树中插入关键字,true成功false失败
 52     bool deleteElem(const string &be_deleted) const;  //Trie树中删除指定关键字,true成功false失败
 53     TrieTreeNode *copy();   //拷贝Trie树,返回指向副本Trie树的指针
 54     TrieTree() { root = new TrieTreeNode(); }
 55     TrieTree(TrieTree &be_copied) { root = be_copied.copy(); }
 56     ~TrieTree();
 57 private:
 58     bool static strCompare(const string &left, const string &right, const size_t &i);
 59     TrieTreeNode *root;   //Trie树根节点
 60 };
 61 
 62 bool TrieTree::strCompare(const string &left, const string &right, const size_t &i)
 63 {
 64     for (size_t j = i; ; ++j)
 65     {
 66         if (j >= left.size() && j >= right.size())
 67             return true;
 68         else if (j >= left.size() || j >= right.size())
 69             return false;
 70         else if (left[j] != right[j])
 71             return false;
 72     }
 73 }
 74 
 75 bool TrieTree::deleteElem(const string &be_deleted) const
 76 {
 77     TrieTreeNode *run = root;
 78     stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> work_stack;
 79     string::size_type i = 0;
 80     while (run->type_flag != TrieTreeNode::NodeType::DATANODE)
 81     {
 82         if (i < be_deleted.size())
 83         {
 84             string temp = be_deleted.substr(i, 1);
 85             ++i;
 86             map<string, TrieTreeNode *>::iterator it = run->sub_ptr.find(temp);
 87             if (it == run->sub_ptr.end())
 88             {
 89                 return false;
 90             }
 91             else
 92             {
 93                 work_stack.push(make_pair(run, it));
 94                 run = it->second;
 95             }
 96         }
 97         else
 98         {
 99             map<string, TrieTreeNode *>::iterator it = run->sub_ptr.find("");
100             if (it != run->sub_ptr.end())
101             {
102                 work_stack.push(make_pair(run, it));
103                 run = it->second;
104                 break;
105             }
106             else
107             {
108                 return false;
109             }
110         }
111     }
112     
113     if (work_stack.top().second->first != "" && strCompare(be_deleted, run->key_in_trie, i) == false)
114     {
115         return false;
116     }
117 
118     bool delete_or_not = true;
119     while (work_stack.top().first != root)
120     {
121         if (delete_or_not == true)
122         {
123             delete work_stack.top().second->second;
124             if (work_stack.top().second->second->type_flag == TrieTreeNode::NodeType::DATANODE)
125             {
126                 run = nullptr;
127             }
128 
129             work_stack.top().first->sub_ptr.erase(work_stack.top().second);
130 
131             if (work_stack.top().first->sub_ptr.size() >= 2)
132             {
133                 return true;
134             }
135             else if (work_stack.top().first->sub_ptr.size() == 1)
136             {
137                 if (work_stack.top().first->sub_ptr.begin()->second->type_flag != TrieTreeNode::NodeType::DATANODE)
138                 {
139                     return true;
140                 }
141                 else
142                 {
143                     run = work_stack.top().first->sub_ptr.begin()->second;
144                     delete work_stack.top().first;
145                     delete_or_not = false;
146                 }
147             }
148             work_stack.pop();
149         }
150         else
151         {
152             if (work_stack.top().first->sub_ptr.size() >= 2)
153             {
154                 work_stack.top().second->second = run;
155                 return true;
156             }
157             else
158             {
159                 delete work_stack.top().first;
160                 work_stack.pop();
161             }
162         }
163     }
164 
165     if (delete_or_not == true)
166     {
167         delete work_stack.top().second->second;
168         root->sub_ptr.erase(work_stack.top().second);
169     }
170     else
171     {
172         work_stack.top().second->second = run;
173     }
174     return true;
175 }
176 
177 bool TrieTree::insert(const string &be_inserted) const 
178 {
179     TrieTreeNode *run = root;
180     map<string, TrieTreeNode *>::iterator father;
181     string::size_type i = 0;
182     while (run->type_flag != TrieTreeNode::NodeType::DATANODE)
183     {
184         if (i < be_inserted.size())
185         {
186             string temp = be_inserted.substr(i, 1);
187             ++i;
188             map<string, TrieTreeNode *>::iterator it = run->sub_ptr.find(temp);
189             if (it == run->sub_ptr.end())
190             {
191                 run->sub_ptr.insert(make_pair(temp, new TrieTreeNode(be_inserted)));
192                 return true;
193             }
194             else
195             {
196                 father = it;
197                 run = it->second;
198             }
199         }
200         else
201         {
202             if (run->sub_ptr.find("") != run->sub_ptr.end())
203             {
204                 return false;
205             }
206             else
207             {
208                 run->sub_ptr.insert(make_pair("", new TrieTreeNode(be_inserted)));
209                 return true;
210             }
211         }
212     }
213     
214     if (strCompare(be_inserted, run->key_in_trie, i) == true)  
215     {
216         return false;
217     }
218     else
219     {
220         while (true)
221         {
222             father->second = new TrieTreeNode();
223             if (i >= be_inserted.size())
224             {
225                 father->second->sub_ptr.insert(make_pair("", new TrieTreeNode(be_inserted)));
226                 father->second->sub_ptr.insert(make_pair(run->key_in_trie.substr(i, 1), run));
227             }
228             else if (i >= run->key_in_trie.size())
229             {
230                 father->second->sub_ptr.insert(make_pair("", run));
231                 father->second->sub_ptr.insert(make_pair(be_inserted.substr(i, 1), new TrieTreeNode(be_inserted)));
232             }
233             else if (be_inserted[i] != run->key_in_trie[i])
234             {
235                 father->second->sub_ptr.insert(make_pair(run->key_in_trie.substr(i, 1), run));
236                 father->second->sub_ptr.insert(make_pair(be_inserted.substr(i, 1), new TrieTreeNode(be_inserted)));
237             }
238             else
239             {
240                 father = father->second->sub_ptr.insert(make_pair(be_inserted.substr(i, 1), new TrieTreeNode())).first;
241                 ++i;
242                 continue;
243             }
244             return true;
245         }
246     }
247 }
248 
249 TrieTree::~TrieTree()
250 {
251     TrieTreeNode *run = root;
252     stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> work_stack;
253 
254     bool trace_back_flag = true;
255     while (true)
256     {
257         if (trace_back_flag == true)
258         {
259             if (run == root)
260             {
261                 if (run->sub_ptr.begin() == run->sub_ptr.end())
262                 {
263                     delete root;
264                     return;
265                 }
266             }
267             else
268             {
269                 if (run->type_flag == TrieTreeNode::DATANODE)
270                 {
271                     delete run;
272                     run = work_stack.top().first;
273                     work_stack.top().second = run->sub_ptr.erase(work_stack.top().second);
274                     trace_back_flag = false;
275                     continue;
276                 }
277             }
278 
279             work_stack.push(make_pair(run, run->sub_ptr.begin()));
280             run = run->sub_ptr.begin()->second;
281         }
282         else
283         {
284             if (run == root)
285             {
286                 if (work_stack.top().second == root->sub_ptr.end())
287                 {
288                     delete root;
289                     return;
290                 }
291 
292                 run = work_stack.top().second->second;
293                 trace_back_flag = true;
294             }
295             else
296             {
297                 if (work_stack.top().second != run->sub_ptr.end())
298                 {
299                     run = work_stack.top().second->second;
300                     trace_back_flag = true;
301                 }
302                 else
303                 {
304                     delete run;
305                     work_stack.pop();
306                     run = work_stack.top().first;
307                     work_stack.top().second = run->sub_ptr.erase(work_stack.top().second);
308                 }
309             }
310         }
311     }
312 }
313 
314 TrieTreeNode *TrieTree::copy()
315 {
316     TrieTreeNode *be_copied = root;
317     stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> work_stack;
318     stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> copy_trace_stack;
319     TrieTreeNode *root_of_copy = nullptr;
320 
321     bool trace_back_flag = true;
322     while (true)
323     {
324         if (trace_back_flag == true)
325         {
326             if (be_copied == root)
327             {
328                 root_of_copy = new TrieTreeNode(*be_copied);
329                 if (be_copied->sub_ptr.begin() == be_copied->sub_ptr.end())
330                 {
331                     break;
332                 }
333                 copy_trace_stack.push(make_pair(root_of_copy, root_of_copy->sub_ptr.begin()));
334             }
335             else 
336             {
337                 if (work_stack.top().second == work_stack.top().first->sub_ptr.begin())
338                 {
339                     copy_trace_stack.top().second->second = new TrieTreeNode(*be_copied);
340                 }
341                 else
342                 {
343                     ++copy_trace_stack.top().second;
344                     copy_trace_stack.top().second->second = new TrieTreeNode(*be_copied);
345                 }
346                 if (be_copied->type_flag != TrieTreeNode::DATANODE)
347                     copy_trace_stack.push(make_pair(copy_trace_stack.top().second->second, copy_trace_stack.top().second->second->sub_ptr.begin()));
348                 else
349                 {
350                     be_copied = work_stack.top().first;
351                     trace_back_flag = false;
352                     continue;
353                 }
354             }
355 
356             work_stack.push(make_pair(be_copied, be_copied->sub_ptr.begin()));
357             be_copied = be_copied->sub_ptr.begin()->second;
358         }
359         else
360         {
361             map<string, TrieTreeNode *>::iterator tempit = work_stack.top().second;
362             if (tempit->second->type_flag != TrieTreeNode::DATANODE)
363             {
364                 copy_trace_stack.pop();
365             }
366 
367             if (be_copied == root)
368             {
369                 if (++(work_stack.top().second) == root->sub_ptr.end())
370                  break;
371 
372                 be_copied = work_stack.top().second->second;
373                 trace_back_flag = true;
374             }
375             else
376             {
377                 if (++(work_stack.top().second) != be_copied->sub_ptr.end())
378                 {
379                     be_copied = work_stack.top().second->second;
380                     trace_back_flag = true;
381                 }
382                 else
383                 {
384                     work_stack.pop();
385                     be_copied = work_stack.top().first;
386                 }
387             }
388         }
389     }
390     return root_of_copy;
391 }
392 
393 int main()
394 {
395     vector<string> test = {"abcd", "abydb", "ary", "AFD", "abyc", "AFDGH", "AFMGB", "AFMGRQ", "cdfg", "cdgkn", "cdgkmq"};
396     TrieTree test_obj;
397     for (vector<string>::iterator p = test.begin(); p != test.end(); ++p)
398     {
399         cout << "插入字符串" << *p << endl;
400         test_obj.insert(*p);
401     }
402 
403     cout << endl;
404     // TrieTreeNode *copy = test_ptr.copy();
405     for (vector<string>::iterator p = test.begin(); p != test.end(); ++p)
406     {
407         cout << "删除字符串" << *p << endl;
408         test_obj.deleteElem(*p);
409     }
410     cout << endl;
411 }

猜你喜欢

转载自www.cnblogs.com/WSKIT/p/10217811.html
今日推荐