Elasticsearch的聚合查询分为四大类,分别是Bucket Aggregation、Metric Aggregation、Pipeline Aggregation、Matrix Aggregration。具体的四大类都是什么意思呢?都是怎么运用呢?
Bucket Aggregation :一些列满足特定条件的文档的集合
Metric Aggregation :一些数学运算,可以对文档字段进行统计分析
Pipline Aggregation :对其他的聚合结果进行二次聚合
Matrix Aggregration :支持对多个字段的操作并提供一个结果矩阵
Aggregation的语法
Aggregation属于search的一部分,一般情况下,建议将其Size指定为0
{
"query":{
},
"aggregations":{ // 和Query同级的关键词
"<aggregation_name>":{ // 自定义的聚合名字
"<aggregation_type>":{ // 聚合定义:不同的Type+body
<aggregation_body>
}
[,"meta":{[<meta_data_body>]}]?
[,"aggregations":{[<sub_aggregation>]+}]? // 子聚合查询
}
[,"<aggregation_name_2>":{}]*
}
}
以下案例使用Kibana中自带的Sample Data的飞机航班数据进行讲解,请先导入数据
具体的聚合类型有哪些,怎么使用,请查看官网
Bucket Aggregation
- 类似于SQL语句中的group by,对指定字段进行分桶。ES中提供很多类型的Bucket,使用较多的是Terms
& Range
Terms Aggregation
- Terms聚合操作的字段需要打开fielddata,其中keyword默认支持doc_values,Text需要在Mapping中enable,然后按照分词后的结果进行分桶
##使用terms 聚合桶 按照目的地进行分桶统计
GET kibana_sample_data_flights/_search
{
"size": 0, ## 条数为0,获取结果是不展示具体条数
"aggs":{ ## 聚合固定写法
"flight_dest":{ ## 聚合的名称
"terms":{ ## term查询排序
"field":"DestCountry" ## 需要聚合的字段
}
}
}
}
## 结果展示
{
"took" : 36,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"flight_dest" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 3187,
"buckets" : [
{
"key" : "IT",
"doc_count" : 2371
},
{
"key" : "US",
"doc_count" : 1987
},
{
"key" : "CN",
"doc_count" : 1096
},
{
"key" : "CA",
"doc_count" : 944
},
{
"key" : "JP",
"doc_count" : 774
},
{
"key" : "RU",
"doc_count" : 739
},
{
"key" : "CH",
"doc_count" : 691
},
{
"key" : "GB",
"doc_count" : 449
},
{
"key" : "AU",
"doc_count" : 416
},
{
"key" : "PL",
"doc_count" : 405
}
]
}
}
}
## 使用terms 按目的地分桶后,按延误类型再分桶,多次聚合操作
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs":{
"flight_dest_delay":{
"terms":{
"field":"DestCountry"
},
"aggs":{
"flight_delay_type":{
"terms":{
"field": "FlightDelayType"
}
}
}
}
}
}
## 展示结果如下
"aggregations" : {
"flight_dest_delay" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 3187,
"buckets" : [
{
"key" : "IT",
"doc_count" : 2371,
"flight_delay_type" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "No Delay",
"doc_count" : 1722
},
{
"key" : "NAS Delay",
"doc_count" : 180
},
{
"key" : "Carrier Delay",
"doc_count" : 168
},
{
"key" : "Late Aircraft Delay",
"doc_count" : 164
},
{
"key" : "Weather Delay",
"doc_count" : 74
},
{
"key" : "Security Delay",
"doc_count" : 63
}
]
}
}
]
}
}
Range Aggregation
## 使用平均票价进行范围聚合
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs":{
"flight_avg_ticket_price":{
"range":{
"field":"AvgTicketPrice",
"ranges": [
{"to":200},
{"from":200,"to":500},
{"from":500}
]
}
}
}
}
## 结果展示
{
"took" : 17,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"flight_time_hour" : {
"buckets" : [
{
"key" : "*-200.0",
"to" : 200.0,
"doc_count" : 749
},
{
"key" : "200.0-500.0",
"from" : 200.0,
"to" : 500.0,
"doc_count" : 3662
},
{
"key" : "500.0-*",
"from" : 500.0,
"doc_count" : 8648
}
]
}
}
}
Histogram Aggregation
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"ticket_price_histrogram": {
"histogram": {
"field":"AvgTicketPrice",
"interval":300,
"extended_bounds":{
"min":0,
"max":1500
}
}
}
}
}
## 展示结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"ticket_price_histrogram" : {
"buckets" : [
{
"key" : 0.0,
"doc_count" : 1816
},
{
"key" : 300.0,
"doc_count" : 4115
},
{
"key" : 600.0,
"doc_count" : 4765
},
{
"key" : 900.0,
"doc_count" : 2363
},
{
"key" : 1200.0,
"doc_count" : 0
},
{
"key" : 1500.0,
"doc_count" : 0
}
]
}
}
}
Metric Aggregation
- 做具体分析结果,可以单值分析,比如 min,max,avg,sum,Cardinality(类似distinct count),多值分析,比如 stats, percentile,top hits 等
min,max,avg,sum,stats Aggregation
## 一个请求中查询平均票价最低,平均,最高,还有汇总的情况
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"avg_price": {
"avg": {
"field": "AvgTicketPrice"
}
},
"max_price": {
"max": {
"field": "AvgTicketPrice"
}
},
"min_price": {
"min": {
"field": "AvgTicketPrice"
}
},
"status":{
"stats": {
"field": "AvgTicketPrice"
}
}
}
}
## 结果展示如下
{
"took" : 13,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"max_price" : {
"value" : 1199.72900390625
},
"min_price" : {
"value" : 100.0205307006836
},
"avg_price" : {
"value" : 628.2536888148849
},
"status" : {
"count" : 13059,
"min" : 100.0205307006836,
"max" : 1199.72900390625,
"avg" : 628.2536888148849,
"sum" : 8204364.922233582
}
}
}
Pipline Aggregation
- 管道聚合:支持对聚合分析的结果,再次进行聚合分析。根据位置的不同,管道聚合结果输出到原结果的方式有两类
- Parent - 结果内嵌到现有的聚合分析结果中
- Derivative (求导)
- Cumultive Sum (累计求和)
- Moving Function (滑动窗口)
- Slibing - 结果和现有分析结果同级
- Max,min,Avg,Sum
- Stats,Extended Stats
- Percentiles
Slibing 方式
## stats
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"dest_country": {
"terms": {
"field": "DestCountry"
},
"aggs": {
"avg_ticket_price": {
"avg": {
"field": "AvgTicketPrice"
}
}
}
},
"stats_price_by_dest":{
"stats_bucket": {
"buckets_path": "dest_country>avg_ticket_price"
}
}
}
}
## 结果展示
{
"took" : 25,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"dest_country" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 3187,
"buckets" : [
{
"key" : "IT",
"doc_count" : 2371,
"avg_ticket_price" : {
"value" : 586.9627099618385
}
},
{
"key" : "US",
"doc_count" : 1987,
"avg_ticket_price" : {
"value" : 595.7743908825026
}
},
{
"key" : "CN",
"doc_count" : 1096,
"avg_ticket_price" : {
"value" : 640.7101617033464
}
},
{
"key" : "CA",
"doc_count" : 944,
"avg_ticket_price" : {
"value" : 648.7471090413757
}
},
{
"key" : "JP",
"doc_count" : 774,
"avg_ticket_price" : {
"value" : 650.9203447346847
}
},
{
"key" : "RU",
"doc_count" : 739,
"avg_ticket_price" : {
"value" : 662.9949632162009
}
},
{
"key" : "CH",
"doc_count" : 691,
"avg_ticket_price" : {
"value" : 575.1067587028537
}
},
{
"key" : "GB",
"doc_count" : 449,
"avg_ticket_price" : {
"value" : 650.5326856005696
}
},
{
"key" : "AU",
"doc_count" : 416,
"avg_ticket_price" : {
"value" : 669.5588319668403
}
},
{
"key" : "PL",
"doc_count" : 405,
"avg_ticket_price" : {
"value" : 662.4497233072917
}
}
]
},
"stats_price_by_dest" : {
"count" : 10,
"min" : 575.1067587028537,
"max" : 669.5588319668403,
"avg" : 634.3757679117504,
"sum" : 6343.757679117503
}
}
}
## percentiles
GET kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"dest_country": {
"terms": {
"field": "DestCountry"
},
"aggs": {
"avg_ticket_price": {
"avg": {
"field": "AvgTicketPrice"
}
}
}
},
"percent_price_by_dest":{
"percentiles_bucket": {
"buckets_path": "dest_country>avg_ticket_price"
}
}
}
}
## 结果展示
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"dest_country" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 3187,
"buckets" : [
{
"key" : "IT",
"doc_count" : 2371,
"avg_ticket_price" : {
"value" : 586.9627099618385
}
},
{
"key" : "US",
"doc_count" : 1987,
"avg_ticket_price" : {
"value" : 595.7743908825026
}
},
{
"key" : "CN",
"doc_count" : 1096,
"avg_ticket_price" : {
"value" : 640.7101617033464
}
},
{
"key" : "CA",
"doc_count" : 944,
"avg_ticket_price" : {
"value" : 648.7471090413757
}
},
{
"key" : "JP",
"doc_count" : 774,
"avg_ticket_price" : {
"value" : 650.9203447346847
}
},
{
"key" : "RU",
"doc_count" : 739,
"avg_ticket_price" : {
"value" : 662.9949632162009
}
},
{
"key" : "CH",
"doc_count" : 691,
"avg_ticket_price" : {
"value" : 575.1067587028537
}
},
{
"key" : "GB",
"doc_count" : 449,
"avg_ticket_price" : {
"value" : 650.5326856005696
}
},
{
"key" : "AU",
"doc_count" : 416,
"avg_ticket_price" : {
"value" : 669.5588319668403
}
},
{
"key" : "PL",
"doc_count" : 405,
"avg_ticket_price" : {
"value" : 662.4497233072917
}
}
]
},
"percent_price_by_dest" : {
"values" : {
"1.0" : 575.1067587028537,
"5.0" : 575.1067587028537,
"25.0" : 595.7743908825026,
"50.0" : 650.5326856005696,
"75.0" : 662.4497233072917,
"95.0" : 669.5588319668403,
"99.0" : 669.5588319668403
}
}
}
}
Parent 方式
## 求导
POST kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"age": {
"histogram": {
"field": "DistanceMiles",
"min_doc_count": 1,
"interval": 3000
},
"aggs": {
"avg_ticket_price": {
"avg": {
"field": "AvgTicketPrice"
}
},
"derivative_avg_ticket_price":{
"derivative": {
"buckets_path": "avg_ticket_price"
}
}
}
}
}
}
## 结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"age" : {
"buckets" : [
{
"key" : 0.0,
"doc_count" : 4009,
"avg_ticket_price" : {
"value" : 511.68695465910014
}
},
{
"key" : 3000.0,
"doc_count" : 5644,
"avg_ticket_price" : {
"value" : 676.1384397110645
},
"derivative_avg_ticket_price" : {
"value" : 164.45148505196437
}
},
{
"key" : 6000.0,
"doc_count" : 2513,
"avg_ticket_price" : {
"value" : 687.4682523922478
},
"derivative_avg_ticket_price" : {
"value" : 11.329812681183284
}
},
{
"key" : 9000.0,
"doc_count" : 866,
"avg_ticket_price" : {
"value" : 681.8708027229574
},
"derivative_avg_ticket_price" : {
"value" : -5.597449669290427
}
},
{
"key" : 12000.0,
"doc_count" : 27,
"avg_ticket_price" : {
"value" : 695.5086613407841
},
"derivative_avg_ticket_price" : {
"value" : 13.637858617826737
}
}
]
}
}
}
## 汇总
POST kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"age": {
"histogram": {
"field": "DistanceMiles",
"min_doc_count": 1,
"interval": 3000
},
"aggs": {
"avg_ticket_price": {
"avg": {
"field": "AvgTicketPrice"
}
},
"cumulative_avg_ticket_price":{
"cumulative_sum": {
"buckets_path": "avg_ticket_price"
}
}
}
}
}
}
## 结果展示
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"age" : {
"buckets" : [
{
"key" : 0.0,
"doc_count" : 4009,
"avg_ticket_price" : {
"value" : 511.68695465910014
},
"cumulative_avg_ticket_price" : {
"value" : 511.68695465910014
}
},
{
"key" : 3000.0,
"doc_count" : 5644,
"avg_ticket_price" : {
"value" : 676.1384397110645
},
"cumulative_avg_ticket_price" : {
"value" : 1187.8253943701648
}
},
{
"key" : 6000.0,
"doc_count" : 2513,
"avg_ticket_price" : {
"value" : 687.4682523922478
},
"cumulative_avg_ticket_price" : {
"value" : 1875.2936467624127
}
},
{
"key" : 9000.0,
"doc_count" : 866,
"avg_ticket_price" : {
"value" : 681.8708027229574
},
"cumulative_avg_ticket_price" : {
"value" : 2557.16444948537
}
},
{
"key" : 12000.0,
"doc_count" : 27,
"avg_ticket_price" : {
"value" : 695.5086613407841
},
"cumulative_avg_ticket_price" : {
"value" : 3252.673110826154
}
}
]
}
}
}
## 滑动窗口
POST kibana_sample_data_flights/_search
{
"size": 0,
"aggs": {
"age": {
"histogram": {
"field": "DistanceMiles",
"min_doc_count": 1,
"interval": 3000
},
"aggs": {
"avg_ticket_price": {
"avg": {
"field": "AvgTicketPrice"
}
},
"moving_avg_ticket_price":{
"moving_fn": {
"buckets_path": "avg_ticket_price",
"window":10,
"script": "MovingFunctions.min(values)"
}
}
}
}
}
}
## 结果展示
{
"took" : 9,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"age" : {
"buckets" : [
{
"key" : 0.0,
"doc_count" : 4009,
"avg_ticket_price" : {
"value" : 511.68695465910014
},
"moving_avg_ticket_price" : {
"value" : null
}
},
{
"key" : 3000.0,
"doc_count" : 5644,
"avg_ticket_price" : {
"value" : 676.1384397110645
},
"moving_avg_ticket_price" : {
"value" : 511.68695465910014
}
},
{
"key" : 6000.0,
"doc_count" : 2513,
"avg_ticket_price" : {
"value" : 687.4682523922478
},
"moving_avg_ticket_price" : {
"value" : 511.68695465910014
}
},
{
"key" : 9000.0,
"doc_count" : 866,
"avg_ticket_price" : {
"value" : 681.8708027229574
},
"moving_avg_ticket_price" : {
"value" : 511.68695465910014
}
},
{
"key" : 12000.0,
"doc_count" : 27,
"avg_ticket_price" : {
"value" : 695.5086613407841
},
"moving_avg_ticket_price" : {
"value" : 511.68695465910014
}
}
]
}
}
}