map和set的使用及top K问题

1.map和set的应用和比较

　　map和set都是关联式容器，底层容器都是红黑树。

　　map以键值对的形式进行存储，方便进行查找，关键词起到索引的作用，值则表示与索引相关联的数据，以红黑树的结构实现，插入删除等操作都可以在O(log n)时间内完成。

所有元素都是键+值存在，key=value组成pair，是一组映射关系。
不允许键重复
所有元素是通过键进行自动排序的
map的键是不能修改的，但是其键对应的值是可以修改的

 1 #include<string>
 2 #include<vector>
 3 
 4 //模拟pair和 make_pair的底层实现
 5 //template<class K, class V>
 6 //struct pair
 7 //{
 8 //    K first;
 9 //    V second;
10 //
11 //    pair(const K& key, const V& value)
12 //        :first(key)
13 //        , second(value)
14 //    {}
15 //};
16 //template<class K, class V>
17 //pair<K, V> make_pair(const K& key, const V& value)
18 //{
19 //    return pair<K, V>(key, value);
20 //}
21 
22 //vector<string> GetTopKF(const vector<string>& fruits)
23 //{
24 //    vector<string> topk;
25 //    typedef map<string, int> CountTop;
26 //    typedef map<string, int>::iterator CountIt;
27 //    CountTop counttop;
28 //    for (size_t i = 0; i < fruits.size(); i++) {
29 //        CountIt countit = counttop.find(fruits[i]);
30 //        if (countit != counttop.end())
31 //            (countit->second)++;
32 //        else
33 //            //counttop.insert(pair<string, int>(fruits[i], 1));
34 //            counttop.insert(make_pair(fruits[i], 1));
35 //    }
36 //    return topk;
37 //}
38 vector<string> GetTopKF(const vector<string>& fruits)
39 {
40     vector<string> topk;
41     typedef map<string, int> CountTop;
42     typedef map<string, int>::iterator CountIt;
43     CountTop counttop;
44     for (size_t i = 0; i < fruits.size(); i++) {
45         /*pair<CountIt, bool> retKV = counttop.insert(make_pair(fruits[i], 1));
46         if (retKV.second == false)
47         {
48         retKV.first->second++;
49         }*/
50         counttop[fruits[i]]++;
51     }
52     return topk;
53 }
54 
55 
56 void MapTest()
57 {
58     typedef map<string, string> Dict;
59     typedef map<string, string>::iterator DictIt;
60     Dict dict;
61     dict.insert(pair<string, string>("right", "右边"));
62     dict.insert(pair<string, string>("left", "左边"));
63     dict.insert(pair<string, string>("世界", "你好"));
64     dict.insert(pair<string, string>("hello", "word"));
65     dict.insert(pair<string, string>("key", "键值"));
66 
67     DictIt dictit = dict.begin();
68     while (dictit != dict.end()) {
69         cout << (*dictit).first << " " << (*dictit).second << endl;
70         ++dictit;
71     }
72     DictIt ret = dict.find("left");
73     if(ret != dict.end())
74         dict.erase(ret);
75     vector<string> v;
76     v.push_back("梨");
77     v.push_back("苹果");
78     v.push_back("西瓜");
79     v.push_back("香蕉");
80     v.push_back("西瓜");
81     v.push_back("香蕉");
82     v.push_back("菠萝");
83     v.push_back("西瓜");
84     v.push_back("草莓");
85     GetTopKF(v);
86 }

　　set支持高效的关键字查询操作---检查每一个给定的关键字是否在set中，也支持高效插入删除。

　　以平衡二叉检索树实现，查找使用中序遍历算法，检索效率高于vector，deque，list等容器，另外使用中序遍历可将键值按照从小到大遍历出来，构造set集合的主要目的是为了快速检索，不可直接去修改键值。

所得元素的只有key没有value，value就是key
不允许出现键值重复
所有的元素都会被自动排序
不能通过迭代器来改变set的值，因为set的值就是键

 1 #pragma once
 2 #include<iostream>
 3 #include<set>
 4 #include<map>
 5 
 6 using namespace std;
 7 
 8 void SetTest()
 9 {
10     set<int> s1;  //没有数据冗余
11     s1.insert(4);  
12     s1.insert(5);
13     s1.insert(7);
14     s1.insert(7);
15     s1.insert(14);
16     s1.insert(7);
17     s1.insert(9);
18     s1.insert(3);
19     s1.insert(0);
20     s1.insert(30);
21     s1.insert(14);
22     s1.insert(6);
23     s1.insert(28);  //set的插入操作
24 
25     set<int>::iterator ite = s1.begin();
26     //ite = 10;
27     while (ite != s1.end()) {  //利用迭代器遍历打印数据
28         cout<<*ite<<" ";
29         ite++;
30     }
31     cout << endl;
32     set<int>::reverse_iterator ret1= s1.rbegin(); 
33     while (ret1 != s1.rend()) { //降序打印
34         cout << *ret1 << " ";
35         ret1++;
36     }
37 
38     set<int>::iterator ret = s1.find(10);  //
39     if (ret != s1.end()) //set的查找，如果没有找到不会报错
40         cout << "find it" << *ret << endl; 
41     else
42         cout << "null" << endl;
43 
44     if (s1.count(14))//只判断是否存在14，返回1或0
45         cout << "find it" << endl;
46     else
47         cout << "null" << endl;
48 
49     ret = s1.find(30); //find后删除
50     if (ret != s1.end()) 
51         s1.erase(ret);
52     set<int>::iterator last, first;
53     first = s1.lower_bound(8); //返回8大的第一个数
54     last = s1.upper_bound(20); //返回20大的第一个数
55     s1.erase(first, last);//删除这个范围的数据
56     s1.erase(100); //有就删除，没有也不报错
57 
58     set<int>::iterator ite1 = s1.begin();
59     while (ite1 != s1.end()) {
60         cout << *ite1 << " ";
61         ite1++;
62     }
63 }
64 void MultisetTest() {
65     multiset<int> s2; //允许数据冗余,其他操作同set
66     s2.insert(13);
67     s2.insert(4);
68     s2.insert(6);
69     s2.insert(19);
70     s2.insert(20);
71     s2.insert(16);
72     s2.insert(9);
73     s2.insert(12);
74     s2.insert(9);
75     s2.insert(7);
76     s2.insert(5);
77     s2.insert(13);
78     s2.insert(9); 
79     multiset<int>::iterator mit = s2.begin();
80     while (mit != s2.end()) {
81         cout << *mit << " ";
82         mit++;
83     }
84     multiset<int>::iterator mIt = s2.find(20);
85     /*++mIt;
86     ++mIt;
87     ++mIt;
88     ++mIt;*/
89  }

　　map的节点是一对数据，set的节点是一个数据。

2.扩展

　　Multimap允许数据冗余，即存储的数据不唯一。

　　hashmap是基于散列表（哈希表，hash table）实现的。基本原理是：使用一个下标范围比较大的数组来存储元素。可以设计一个函数（哈希函数，也叫做散列函数），使得每个元素的关键字都与一个函数值（即数组下标，hash值）相对应，于是用这个数组单元来存储这个元素；也可以简单的理解为，按照关键字为每一个元素“分类”，然后将这个元素存储在相应“类”所对应的地方，称为桶。

但不能够保证每个元素的关键字与函数值是一一对应的，有可能出现对于不同的元素，得到相同的函数值，这就是哈希冲突，往往需要专门的哈希冲突处理函数来解决。

　　hashma插入和查找的速度与哈希函数和冲突处理函数的实现有关，是这两个函数耗时的总和。查询时间复杂度是O(1)；

看具体的应用，不一定常数级别的hash_map一定比log(n)级别的map要好，hash_map的hash函数以及解决地址冲突等都要耗时间，而且众所周知hash表是以空间换时间的，因而hash_map的内存消耗肯定要大，一般情况下，如果记录非常大，考虑hash_map，查找效率会高很多，如果要考虑内存消耗，则要谨慎使用hash_map。

　　Multiset允许数据冗余。

map和set的使用及top K问题

猜你喜欢