Elasticsearch指标聚合分析

指标聚合分析

平均值分析 - avg

  • grade字段的平均值
{
    "size": 0,
    "aggs" : {
        "avg_grade" : { 
            "avg" : { 
                "field" : "grade" 
            }
        }
    }
}
  • 使用脚本
{
    "size": 0,
    "aggs" : {
        "avg_grade" : {
            "avg" : {
                "script" : {
                    "source" : "doc.grade.value"
                }
            }
        }
    }
}
  • 文件脚本
{
    "size": 0,
    "aggs" : {
        "avg_grade" : {
            "avg" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "grade"
                    }
                }
            }
        }
    }
}
  • 值脚本(个人理解为加权)
{
    "size": 0,
    "aggs" : {
        "avg_corrected_grade" : {
            "avg" : {
                "field" : "grade",
                "script" : {
                    "lang": "painless",
                    "source": "_value * params.correction",
                    "params" : {
                        "correction" : 1.2
                    }
                }
            }
        }
    }
}
  • 缺失值处理
{
    "size": 0,
    "aggs" : {
        "grade_avg" : {
            "avg" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

去重操作 - cardinality

  • 种类去重
{
    "size": 0,
    "aggs" : {
        "type_count" : {
            "cardinality" : {
                "field" : "type"
            }
        }
    }
}
  • 精准控制去重

precision_threshold 这个参数指定了计数范围,最大支持40000,默认3000,超过此范围,去重将不准确

{
    "aggs" : {
        "type_count" : {
            "cardinality" : {
                "field" : "type",
                "precision_threshold": 100 
            }
        }
    }
}
  • 脚本
{
    "aggs" : {
        "type_promoted_count" : {
            "cardinality" : {
                "script": {
                    "lang": "painless",
                    "source": "doc['type'].value + ' ' + doc['promoted'].value"
                }
            }
        }
    }
}
  • 文件脚本
{
    "aggs" : {
        "type_promoted_count" : {
            "cardinality" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "type_field": "type",
                        "promoted_field": "promoted"
                    }
                }
            }
        }
    }
}
  • 缺失值处理
{
    "aggs" : {
        "tag_cardinality" : {
            "cardinality" : {
                "field" : "tag",
                "missing": "N/A" 
            }
        }
    }
}

聚合指标 - extended_stats

  • 所有指标

包括计数,最值,平均值等。

{
    "aggs" : {
        "grades_stats" : { 
            "extended_stats" : { 
                "field" : "grade"
            }
        }
    }
}
  • 指定sigma标准差个数
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "field" : "grade",
                "sigma" : 3 
            }
        }
    }
}
  • 脚本
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "script" : {
                    "source" : "doc['grade'].value",
                    "lang" : "painless"
                 }
             }
         }
    }
}
  • 文件脚本
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "grade"
                    }
                }
            }
        }
    }
}
  • 值脚本
{
    "aggs" : {
        "aggs" : {
            "grades_stats" : {
                "extended_stats" : {
                    "field" : "grade",
                    "script" : {
                        "lang" : "painless",
                        "source": "_value * params.correction",
                        "params" : {
                            "correction" : 1.2
                        }
                    }
                }
            }
        }
    }
}
  • 缺省值处理
{
    "aggs" : {
        "grades_stats" : {
            "extended_stats" : {
                "field" : "grade",
                "missing": 0 
            }
        }
    }
}

地理区域坐标聚合 - geo_bounds

wrap_longitude允许边界和时区线重合

{
    "aggs" : {
        "viewport" : {
            "geo_bounds" : {
                "field" : "location", 
                "wrap_longitude" : true 
            }
        }
    }
}

地理区域的一个点 - geo_centroid

{
    "aggs" : {
        "centroid" : {
            "geo_centroid" : {
                "field" : "location" 
            }
        }
    }
}
  • 一个栗子

根据city字段,聚合分析location字段

{
    "aggs" : {
        "cities" : {
            "terms" : { "field" : "city.keyword" },
            "aggs" : {
                "centroid" : {
                    "geo_centroid" : { "field" : "location" }
                }
            }
        }
    }
}

最大值 - Max

{
    "aggs" : {
        "max_price" : { "max" : { "field" : "price" } }
    }
}
  • 脚本
{
    "aggs" : {
        "max_price" : {
            "max" : {
                "script" : {
                    "source" : "doc.price.value"
                }
            }
        }
    }
}
  • 文件脚本
{
    "aggs" : {
        "max_price" : {
            "max" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "price"
                    }
                }
            }
        }
    }
}
  • 值脚本
{
    "aggs" : {
        "max_price_in_euros" : {
            "max" : {
                "field" : "price",
                "script" : {
                    "source" : "_value * params.conversion_rate",
                    "params" : {
                        "conversion_rate" : 1.2
                    }
                }
            }
        }
    }
}
  • 缺省值处理
{
    "aggs" : {
        "grade_max" : {
            "max" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

最小值 - Min

{
    "aggs" : {
        "min_price" : { "min" : { "field" : "price" } }
    }
}
  • 脚本
{
    "aggs" : {
        "min_price" : {
            "min" : {
                "script" : {
                    "source" : "doc.price.value"
                }
            }
        }
    }
}
  • 文件脚本
{
    "aggs" : {
        "min_price" : {
            "min" : {
                "script" : {
                    "file": "my_script",
                    "params": {
                        "field": "price"
                    }
                }
            }
        }
    }
}
  • 值脚本
{
    "aggs" : {
        "min_price_in_euros" : {
            "min" : {
                "field" : "price",
                "script" : {
                    "source" : "_value * params.conversion_rate",
                    "params" : {
                        "conversion_rate" : 1.2
                    }
                }
            }
        }
    }
}
  • 缺省值处理
{
    "aggs" : {
        "grade_min" : {
            "min" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

百分数聚合分析 - percentiles

  • 对load_time字段进行,返回指定percents百分区间的count。

默认情况下:percents:[ 1, 5, 25, 50, 75, 95, 99 ]

{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "field" : "load_time",
                "percents" : [95, 99, 99.9] 
            }
        }
    }
}
  • keyed Response

我也不知道是做什么的

{
    "aggs": {
        "balance_outlier": {
            "percentiles": {
                "field": "balance",
                "keyed": false
            }
        }
    }
}
  • 脚本 可以加权后聚合
{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "script" : {
                    "lang": "painless",
                    "source": "doc['load_time'].value / params.timeUnit", 
                    "params" : {
                        "timeUnit" : 1000   
                    }
                }
            }
        }
    }
}
  • 文件脚本
{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "timeUnit" : 1000
                    }
                }
            }
        }
    }
}
  • 压缩

通过指定tdigestcompression值,控制内存使用。compression最大为20*compression

{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "field" : "load_time",
                "tdigest": {
                  "compression" : 200 
                }
            }
        }
    }
}
  • 直方图
{
    "aggs" : {
        "load_time_outlier" : {
            "percentiles" : {
                "field" : "load_time",
                "percents" : [95, 99, 99.9],
                "hdr": { 
                  "number_of_significant_value_digits" : 3   # 分辨率有效位数
                }
            }
        }
    }
}
  • 缺省值处理
{
    "aggs" : {
        "grade_percentiles" : {
            "percentiles" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

百分排名聚合分析 - percentile_ranks

  • 15以内的占比,30以内的占比
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "field" : "load_time", 
                "values" : [15, 30]
            }
        }
    }
}
  • keyed Response
{
    "aggs": {
        "balance_outlier": {
            "percentile_ranks": {
                "field": "balance",
                "values": [25000, 50000],
                "keyed": false
            }
        }
    }
}
  • 脚本
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "values" : [3, 5],
                "script" : {
                    "lang": "painless",
                    "source": "doc['load_time'].value / params.timeUnit", 
                    "params" : {
                        "timeUnit" : 1000   
                    }
                }
            }
        }
    }
}
  • 文件脚本
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "values" : [3, 5],
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "timeUnit" : 1000
                    }
                }
            }
        }
    }
}
  • 直方图
{
    "aggs" : {
        "load_time_outlier" : {
            "percentile_ranks" : {
                "field" : "load_time",
                "values" : [15, 30],
                "hdr": { 
                  "number_of_significant_value_digits" : 3 
                }
            }
        }
    }
}
  • 缺失值处理
{
    "aggs" : {
        "grade_ranks" : {
            "percentile_ranks" : {
                "field" : "grade",
                "missing": 10 
            }
        }
    }
}

统计汇总分析 - stats

  • 返回所有的分析结果
{
    "aggs" : {
        "aggs" : {
            "grades_stats" : { 
                "stats" : { 
                    "field" : "grade" 
                }
            }
        }
    }
}
  • 脚本
{
    "aggs" : {
        "grades_stats" : {
             "stats" : {
                 "script" : {
                     "lang": "painless",
                     "source": "doc['grade'].value"
                 }
             }
         }
    }
}
  • 文件脚本
{
    "aggs" : {
        "grades_stats" : {
            "stats" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "field" : "grade"
                    }
                }
            }
        }
    }
}
  • 值脚本
{
    "aggs" : {
        "aggs" : {
            "grades_stats" : {
                "stats" : {
                    "field" : "grade",
                    "script" :
                        "lang": "painless",
                        "source": "_value * params.correction",
                        "params" : {
                            "correction" : 1.2
                        }
                    }
                }
            }
        }
    }
}
  • 缺失值处理
{
    "aggs" : {
        "grades_stats" : {
            "stats" : {
                "field" : "grade",
                "missing": 0 
            }
        }
    }
}

求和聚合分析 - sum

{
    "aggs" : {
        "hat_prices" : { "sum" : { "field" : "price" } }
    }
}
  • 脚本
{
    "aggs" : {
        "hat_prices" : {
            "sum" : {
                "script" : {
                   "source": "doc.price.value"
                }
            }
        }
    }
}
  • 文件脚本
{
    "aggs" : {
        "hat_prices" : {
            "sum" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "field" : "price"
                    }
                }
            }
        }
    }
}
  • 值脚本
{
    "aggs" : {
        "square_hats" : {
            "sum" : {
                "field" : "price",
                "script" : {
                    "source": "_value * _value"
                }
            }
        }
    }
}

  • 缺失值处理
{
    "aggs" : {
        "hat_prices" : {
            "sum" : {
                "field" : "price",
                "missing": 100 
            }
        }
    }
}

热点数据 - top_hits

  • 一个栗子

这个聚合分析方法返回常规的搜索命中,你可以在**_souce**字段中指定返回的字段。

{
    "aggs": {
        "top_tags": {
            "terms": {
                "field": "type",
                "size": 3
            },
            "aggs": {
                "top_sales_hits": {
                    "top_hits": {
                        "sort": [
                            {
                                "date": {
                                    "order": "desc"
                                }
                            }
                        ],
                        "_source": {
                            "includes": [ "date", "price" ]
                        },
                        "size" : 1
                    }
                }
            }
        }
    }
}

计数聚合分析 - value_count

  • 返回有type字段的个数
{
    "aggs" : {
        "types_count" : { "value_count" : { "field" : "type" } }
    }
}
  • 脚本
{
    "aggs" : {
        "type_count" : {
            "value_count" : {
                "script" : {
                    "source" : "doc['type'].value"
                }
            }
        }
    }
}
  • 文件脚本
{
    "aggs" : {
        "types_count" : {
            "value_count" : {
                "script" : {
                    "file": "my_script",
                    "params" : {
                        "field" : "type"
                    }
                }
            }
        }
    }
}

猜你喜欢

转载自blog.csdn.net/qq_15260769/article/details/88942274