ElasticSearch 聚合

  • 聚合(aggregations)

    聚合框架收集由搜索查询选择的所有数据,并由许多构件组成,这些构件有助于构建复杂的数据摘要。聚合的基本结构如下所示-
    
    "aggregations" : {
       "" : {
          "" : {
    
          }
     
          [,"meta" : { [] } ]?
          [,"aggregations" : { []+ } ]?
       }
       [,"" : { ... } ]*
    }
    
    有不同类型的聚合,每种聚合都有自己的用途。本章将详细讨论它们。
  • 指标(Metrics)聚合

    这些汇总有助于根据汇总文档的字段值计算矩阵,有时可以从脚本生成某些值。
    数值矩阵既可以是单值(例如平均聚合),也可以是多值(例如统计数据)。
  • 平均聚合

    此聚合用于获取聚合文档中存在的任何数字字段的平均值。例如,
    
    POST /school/_search
    {
       "aggs":{
          "avg_fees":{"avg":{"field":"fees"}}
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
        "took": 46,
        "timed_out": false,
        "_shards": {
            "total": 1,
            "successful": 1,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": {
                "value": 3,
                "relation": "eq"
            },
            "max_score": 1.0,
            "hits": [
                {
                    "_index": "school",
                    "_type": "_doc",
                    "_id": "10",
                    "_score": 1.0,
                    "_source": {
                        "name": "Saint Paul School",
                        "description": "ICSE Afiliation",
                        "street": "Dawarka",
                        "city": "Delhi",
                        "state": "Delhi",
                        "zip": "110075",
                        "location": [
                            28.5733056,
                            77.0122136
                        ],
                        "fees": 5000,
                        "tags": [
                            "Good Faculty",
                            "Great Sports"
                        ],
                        "rating": "4.5"
                    }
                },
                {
                    "_index": "school",
                    "_type": "_doc",
                    "_id": "5",
                    "_score": 1.0,
                    "_source": {
                        "name": "Central School",
                        "description": "CBSE Affiliation",
                        "street": "Nagan",
                        "city": "paprola",
                        "state": "HP",
                        "zip": "176115",
                        "location": [
                            31.8955385,
                            76.8380405
                        ],
                        "fees": 2200,
                        "tags": [
                            "Senior Secondary",
                            "beautiful campus"
                        ],
                        "rating": "3.3"
                    }
                },
                {
                    "_index": "school",
                    "_type": "_doc",
                    "_id": "16",
                    "_score": 1.0,
                    "_source": {
                        "name": "Crescent School",
                        "description": "State Board Affiliation",
                        "street": "beijing",
                        "city": "Jaipur",
                        "state": "RJ",
                        "zip": "176114",
                        "location": [
                            26.8535922,
                            75.7923988
                        ],
                        "fees": 2500,
                        "tags": [
                            "Well equipped labs"
                        ],
                        "rating": "4.5"
                    }
                }
            ]
        },
        "aggregations": {
            "avg_fees": {
                "value": 3233.3333333333335
            }
        }
    }
    
  • 基数聚合

    此聚合提供了特定字段的不同值的计数。
    
    POST /school/_search?size=0
    {
       "aggs":{
          "distinct_name_count":{"cardinality":{"field":"fees"}}
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
       "took" : 2,
       "timed_out" : false,
       "_shards" : {
          "total" : 1,
          "successful" : 1,
          "skipped" : 0,
          "failed" : 0
       },
       "hits" : {
          "total" : {
             "value" : 2,
             "relation" : "eq"
          },
          "max_score" : null,
          "hits" : [ ]
       },
       "aggregations" : {
          "distinct_name_count" : {
             "value" : 2
          }
       }
    }
    
    注意-基数的值为2,因为费用有两个不同的值。
  • 扩展统计汇总

    此聚合生成有关聚合文档中特定数字字段的所有统计信息。
    
    POST /school/_search?size=0
    {
       "aggs" : {
          "fees_stats" : { "extended_stats" : { "field" : "fees" } }
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
        "took": 13,
        "timed_out": false,
        "_shards": {
            "total": 1,
            "successful": 1,
            "skipped": 0,
            "failed": 0
        },
        "hits": {
            "total": {
                "value": 3,
                "relation": "eq"
            },
            "max_score": null,
            "hits": []
        },
        "aggregations": {
            "fees_stats": {
                "count": 3,
                "min": 2200.0,
                "max": 5000.0,
                "avg": 3233.3333333333335,
                "sum": 9700.0,
                "sum_of_squares": 3.609E7,
                "variance": 1575555.555555556,
                "variance_population": 1575555.555555556,
                "variance_sampling": 2363333.333333334,
                "std_deviation": 1255.2113589175156,
                "std_deviation_population": 1255.2113589175156,
                "std_deviation_sampling": 1537.3136743466944,
                "std_deviation_bounds": {
                    "upper": 5743.756051168364,
                    "lower": 722.9106154983024,
                    "upper_population": 5743.756051168364,
                    "lower_population": 722.9106154983024,
                    "upper_sampling": 6307.960682026722,
                    "lower_sampling": 158.70598463994475
                }
            }
        }
    }
    
  • 最大(Max)聚合

    此聚合在聚合的文档中查找特定数字字段的最大值。
    
    POST /school/_search?size=0
    {
       "aggs" : {
       "max_fees" : { "max" : { "field" : "fees" } }
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
       "took" : 16,
       "timed_out" : false,
       "_shards" : {
          "total" : 1,
          "successful" : 1,
          "skipped" : 0,
          "failed" : 0
       },
      "hits" : {
          "total" : {
             "value" : 2,
             "relation" : "eq"
          },
          "max_score" : null,
          "hits" : [ ]
       },
       "aggregations" : {
          "max_fees" : {
             "value" : 3500.0
          }
       }
    }
    
  • 最小(Min)聚合

    此聚合查找聚合文档中特定数字字段的最小值。
    
    POST /school/_search?size=0
    {
       "aggs" : {
          "min_fees" : { "min" : { "field" : "fees" } }
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
       "took" : 2,
       "timed_out" : false,
       "_shards" : {
          "total" : 1,
          "successful" : 1,
          "skipped" : 0,
          "failed" : 0
       },
       "hits" : {
          "total" : {
             "value" : 2,
             "relation" : "eq"
          },
          "max_score" : null,
          "hits" : [ ]
       },
      "aggregations" : {
          "min_fees" : {
             "value" : 2200.0
          }
       }
    }
    
  • 总和(SUM)

    此聚合计算聚合文档中特定数字字段的总和。
    
    POST /school/_search?size=0
    {
       "aggs" : {
          "total_fees" : { "sum" : { "field" : "fees" } }
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
       "took" : 8,
       "timed_out" : false,
       "_shards" : {
          "total" : 1,
          "successful" : 1,
          "skipped" : 0,
          "failed" : 0
       },
       "hits" : {
          "total" : {
             "value" : 2,
             "relation" : "eq"
          },
          "max_score" : null,
          "hits" : [ ]
       },
       "aggregations" : {
          "total_fees" : {
             "value" : 5700.0
          }
       }
    }
    
    在特殊情况下还有其他一些度量标准聚合,例如地理边界聚合和地理质心聚合,以实现地理位置。
  • 统计汇总

    一种多值指标聚合,可根据从聚合文档中提取的数值计算统计信息。
    
    POST /school/_search?size=0
    {
       "aggs" : {
          "grades_stats" : { "stats" : { "field" : "fees" } }
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
       "took" : 2,
       "timed_out" : false,
       "_shards" : {
          "total" : 1,
          "successful" : 1,
          "skipped" : 0,
          "failed" : 0
       },
       "hits" : {
          "total" : {
             "value" : 2,
             "relation" : "eq"
          },
          "max_score" : null,
          "hits" : [ ]
       },
       "aggregations" : {
          "grades_stats" : {
             "count" : 2,
             "min" : 2200.0,
             "max" : 3500.0,
             "avg" : 2850.0,
             "sum" : 5700.0
          }
       }
    }
    
  • 聚合元数据

    您可以在请求时使用meta标记添加一些有关聚合的数据,并作为响应获取。
    
    POST /school/_search?size=0
    {
       "aggs" : {
          "min_fees" : { "avg" : { "field" : "fees" } ,
             "meta" :{
                "dsc" :"Lowest Fees This Year"
             }
          }
       }
    }
    
    运行上面的代码,我们得到以下结果-
    
    {
       "took" : 0,
       "timed_out" : false,
       "_shards" : {
          "total" : 1,
          "successful" : 1,
          "skipped" : 0,
          "failed" : 0
       },
       "hits" : {
          "total" : {
             "value" : 2,
             "relation" : "eq"
          },
          "max_score" : null,
          "hits" : [ ]
       },
       "aggregations" : {
          "min_fees" : {
             "meta" : {
                "dsc" : "Lowest Fees This Year"
             },
             "value" : 2850.0
          }
       }
    }