一、全文检索

1. 建立索引

2. 执行搜索

二、聚合

一、全文检索

1. 建立索引

MongoDB一个集合上只能建立一个文本索引。

建立文本索引：在集合texttest上的body键上建立文本索引。

db.texttest.createIndex( { body : "text" } );

指定索引的默认语言：

db.texttest.createIndex( { body : "text" }, { default_language : "french" } );

在多种语言上建立索引：同一集合中存在多种语言，需要有一个字段标记每个文档的语言，如下面的四个文档中的lingvo字段标识其语言。

{ _id : 1, content : "cheese", lingvo : "english" }
{ _id : 2, content : "fromage", lingvo: "french" }
{ _id : 3, content : "queso", lingvo: "spanish" }
{ _id : 4, content : "ost", lingvo: "swedish" }

使用文档中给定的语言建立索引：

db.textExample.createIndex( { content : "text" }, { language_override : "lingvo" } );

建立符合索引：同时索引content和comments字段，可以在这两个字段上进行文本搜索。

db.textExample.createIndex( { content : "text", comments : "text" });

使用通配符：在全部字段上建立索引，并命名索引。

db.textExample.createIndex( { "$**": "text" }, { name: "alltextindex" } );

指定权重：指定content的权重是10，comments权重是5，其它字段的权重为1。

db.textExample.createIndex( { content : "text", comments : "text"}, { weights : { content: 10, comments: 5, } } );

同时建立文本和非文本的复合索引：content上建立文本索引，username上建立普通索引。

db.textExample.createIndex( { content : "text", username : 1 });

2. 执行搜索

文本搜索：以fish为词根进行搜索，返回body中匹配fish字符串的文档。

db.texttest.find({ $text : { $search :"fish" } });

过滤结果：在文本匹配的文档中过滤出about键值为food的结果。

db.texttest.find({ $text : { $search : "fish" }, about : "food" });

复杂搜索：返回文档中body键匹配cook，但不匹配lunch的body值。先搜索所有匹配条件的数据，再删除不匹配的数据。

db.texttest.find({ $text : { $search : "cook -lunch" } }, {_id:0, body:1});

字面搜索：返回body键匹配整个字符串mongodb text search，而不是匹配mongodb、text、search这三个单词的文档。

db.texttest.find({ $text : { search : "\"mongodb text search\"" } });

限制返回的文档数：返回1条。

db.texttest.find({ $text : { $search :"fish" }}).limit(1);

显示指定元素：只显示body。

db.texttest.find({ $text : { $search :"fish"}}, { _id : 0, body : 1 });

指定文本搜索使用的语言：全小写方式指定。

db.texttest.find({ $text : { $search :"fish", $language : " french" } });

利用文本与非文本的复合索引优化查询：

db.texttest.createIndex( { about : 1, body : "text" });
db.texttest.find({ $text : { $search : "fish"}, about : "food"}).explain("executionStats").executionStats;

二、聚合

db.collection.aggregate( { $group : { _id : "$color" } } );

类比SQL：

select distinct color from collection;
-- 或
select color from collection group by color;

db.collection.aggregate({ $group : { _id : "$color", count : { $sum : 1 } } });

类比SQL：

select color, count(1) count from collection group by color;

db.collection.aggregate({ $group : { _id : { color: "$color", transport: "$transport"} , count : { $sum : 1 } } });

类比SQL：

select color transport, count(1) 
  from collection 
 group by color, transport;

db.collection.aggregate( 
    [
        { $group : { _id : { color: "$color", transport: "$transport"} , count : { $sum : 1 } } },
        { $limit : 5 }
    ]);

类比SQL：

select color, transport, count(1) 
  from collection 
 group by color, transport 
 limit 5;

db.collection.aggregate( 
    [
        { $match : { num : { $gt : 500 } } },
        { $group : { _id : { color: "$color", transport: "$transport"} , count : { $sum : 1 } } },
        { $limit : 5 }
    ]);

类比SQL：

select color, transport, count(1) 
  from collection 
 where num > 500
 group by color, transport 
 limit 5;

db.collection.aggregate( 
    [
        { $group : { _id : { color: "$color", transport: "$transport"} , count : { $sum : 1 } } },
        { $sort : { _id :1 } },
        { $limit : 5 }
    ]);

类比SQL：

select color, transport, count(1) 
  from collection 
 group by color, transport 
 order by color, transport
 limit 5;

db.collection.aggregate( 
    [
        { $match : { num : { $gt : 500 } } },
        { $group : { _id : { color: "$color", transport: "$transport"} , count : { $sum : 1 } } },
        { $sort : { _id :1 } },
        { $limit : 1 }
    ]);

类比SQL：

select color, transport, count(1) 
  from collection 
 where num > 500
 group by color, transport 
 order by color, transport
 limit 1;

db.collection.aggregate( { $unwind : "$vegetables" });

类比SQL：

select collection.*, substring_index(substring_index(vegetables, ',', id),',' ,-1) vegetables
  from collection, nums -- nums为只有id一列的数字辅助表
 where id <= length(vegetables)-length(replace(vegetables,',',''))+1;

db.collection.aggregate(
    [
        { $unwind : "$vegetables" },
        { $project : { _id: 0, fruits:1, vegetables:1 } }
    ]);

类比SQL：

select fruits, substring_index(substring_index(vegetables, ',', id),',' ,-1) vegetables
  from collection, nums -- nums为只有id一列的数字辅助表
 where id <= length(vegetables)-length(replace(vegetables,',',''))+1;

db.collection.aggregate(
    [
        { $unwind : "$vegetables" },
        { $project : { _id: 0, fruits:1, veggies: "$vegetables" } }
    ]);

类比SQL：

select fruits, substring_index(substring_index(vegetables, ',', id),',' ,-1) veggies
  from collection, nums -- nums为只有id一列的数字辅助表
 where id <= length(vegetables)-length(replace(vegetables,',',''))+1;

db.collection.aggregate(
    [
        { $unwind : "$vegetables" },
        { $project : { _id: 0, fruits:1, vegetables:1 } },
        { $skip : 2995 }
    ]);

类比SQL：

select fruits, substring_index(substring_index(vegetables, ',', id),',' ,-1) vegetables
  from collection, nums -- nums为只有id一列的数字辅助表
 where id <= length(vegetables)-length(replace(vegetables,',',''))+1
 limit 2995, 999999999;

db.collection.aggregate(
    [
        { $unwind : "$vegetables" },
        { $project : { _id: 0, fruits:1, vegetables:1 } },
        { $skip : 2995 },
        { $out : "food" }
    ]);

类比SQL：

create table food as 
select @a:=@a+1 id, fruits, substring_index(substring_index(vegetables, ',', id),',' ,-1) vegetables
  from collection, (select @a:=0) t, nums -- nums为只有id一列的数字辅助表
 where id <= length(vegetables)-length(replace(vegetables,',',''))+1
 limit 2995, 999999999;

db.prima.aggregate(
    [
        {$lookup: {
            from : "secunda",
            localField : "number",
            foreignField : "number",
            as : "secundaDoc"
         } },
    ]);

类比SQL：

select prima.*, concat(secunda.c1,secunda.c2,...secunda.cn) secundaDoc
  from prima left join secunda on prima.number = secunda.number;

db.prima.aggregate(
    [
        {$lookup:{
            from : "secunda",
            localField : "number",
            foreignField : "number",
            as : "secundaDoc" }},
        {$unwind: "$secundaDoc"},
        {$project: {_id : "$number", english:1, ascii:"$secundaDoc.ascii" }}
    ]);

类比SQL：

select prima.*, secunda.*
  from prima left join secunda on prima.number = secunda.number;

三、MapReduce

MongoDB通过两个用户自定义的JavaScript函数实现查询：map和reduce。MongoDB将对指定的集合执行一个专门的查询，所有匹配该查询的文档都将被输入到map函数中。map函数被设计用于生成键值对。任何含有多个值的键都将被输入到reduce函数中，reduce函数将返回输入数据的聚合结果。最后，还有一个可选步骤，通过finalize函数对数据的显示进行完善。

以下是来自文档的图，可以清楚的说明 Map-Reduce 的执行过程。

1. 最简MapReduce

定义map函数：

var map = function() {
    emit(this.color, this.num);
};

MongoDB中使用emit函数向MapReduce提供Key/Value对。map函数接收集合中的color和num字段作为输入，输出为以color为键，以num数组为值的文档。

定义空reduce函数：

var reduce = function(color, numbers) { };

reduce函数接收map传来的键值对，但不执行任何操作。

执行MapReduce：

db.mapreduce.mapReduce(map,reduce,{ out: { inline : 1 } });

{ out : { inline : 1 } } 表示将执行结果输出到控制台，显示类似如下的结果。

{
    "results" : [
        {
            "_id" : "black",
            "value" : null
        },
        {
            "_id" : "blue",
            "value" : null
        },
        ...
        {
            "_id" : "yellow",
            "value" : null
        }
    ],
    "timeMillis" : 95,
    "counts" : {
        "input" : 1000,
        "emit" : 1000,
        "reduce" : 55,
        "output" : 11
    },
    "ok" : 1,
}

结果显示，为每种颜色创建了一个单独的文档，并且使用颜色作为文档的唯一_id值。因为reduce函数体为空，所以value被设置为null。

2. 求和

定义求和reduce函数：

var reduce = function(color, numbers) {
    return Array.sum(numbers);
};

该reduce函数对每个color对应的多个num求和。

执行MapReduce，并将结果输出到集合mrresult中：

db.mapreduce.mapReduce(map,reduce,{ out: "mrresult" });

查看结果集合：

> db.mrresult.findOne();
{ "_id" : "black", "value" : 45318 }

3. 求平均

map函数：

var map = function() {
    var value = {
        num : this.num,
        count : 1
    };
    emit(this.color, value);
};

count为计数器，为了只统计每个文档一次，将count值设置为1。

reduce函数：

var reduce = function(color, val ) {
    reduceValue = { num : 0, count : 0};
    for (var i = 0; i < val.length; i++) {
        reduceValue.num += val[i].num;
        reduceValue.count += val[i].count;
    }
    return reduceValue;
};

用一个简单的循环对num和count求和。注意reduce函数中return函数返回的值，必须与map函数中发送到emit函数中的value结构相同。

finalize函数：

var finalize = function (key, value) {
    value.avg = value.num/value.count;
    return value;
};

finalize函数从reduce函数接收结果，并计算平均值。

执行：

db.mapreduce.mapReduce(map,reduce,{ out: "mrresult", finalize : finalize });

查看结果：

> db.mrresult.findOne();
{
    "_id" : "black",
    "value" : {
        "num" : 45318,
        "count" : 91,
        "avg" : 498
    }
}

4. 调试

（1）调试map函数
重载emit函数，打印map函数的输出：

var emit = function(key, value) {
    print("emit results - key: " + key + " value: " + tojson(value));
}

使用map.apply和样例文档进行测试：

> map.apply(db.mapreduce.findOne());
emit results - key: blue value: { "num" : 1, "count" : 1 }

（2）调试reduce函数
首先需要确认map和reduce函数返回结果的格式必须严格一致。然后创建一个数组，模拟传入到reduce函数中的数组：

a = [{ "num" : 1, "count" : 1 },{ "num" : 2, "count" : 1 },{ "num" : 3, "count" : 1 }]

现在调用reduce函数，显示返回结果：

>reduce("blue",a);
{ "num" : 6, "count" : 3 }

如果出现某些问题，不理解函数中的内容，那么可以使用printjson()函数将JSON值输出到mongodb日志文件中。在调试时，这是一个有价值的工具。

浅尝辄止MongoDB：高级查询

一、全文检索

1. 建立索引

2. 执行搜索

二、聚合

三、MapReduce

1. 最简MapReduce

2. 求和

3. 求平均

4. 调试

猜你喜欢