ES Java High Level REST Client 聚合返回值解析总结
在使用ES Java Rest High Client的时候,因为初学ES,所以对解析返回值一脸懵逼。
现针对返回值解析写下此文。
原始Rest请求
GET log/orderLog/_search?size=0
{
"query":{
"bool": {
"must": [
{"match_phrase": {
"operator": "15061106601"
}},
{"range": {
"time": {
"gte": 1536019200000,
"lte": 1536205600000
}
}}
]
}
},
"aggs":{
"day_order":{
"date_histogram": {
"field": "time",
"interval": "day"
}
, "aggs": {
"grap_order_success": {
"filter": {
"terms": {
"operating": [
"抢",
"单",
"成",
"功"
]
}
},
"aggs": {
"order_success_count": {
"value_count": {
"field": "time"
}
}
}
},
"order_ok":{
"filter": {
"terms": {
"operating": [
"确",
"认",
"方",
"案"
]
}
},
"aggs": {
"order_ok_count": {
"value_count": {
"field": "time"
}
}
}
},
"percent_customizer":{
"bucket_script": {
"buckets_path": {
"orderGrap":"grap_order_success>order_success_count",
"orderOk":"order_ok>order_ok_count"
},
"script": "params.orderOk/params.orderGrap*100"
}
}
}
}
}
}
此请求的内容为:计算一段时间内,某一操作人确认方案和抢单成功的比,我们称之为确认方案率。计算方式为:
每日某人的抢单数量/每日某人的确认方案数量。
- 先使用起止时间和操作人进行筛选;
- 主要使用的Date Histogram Aggregation,对日期以天为间隔分桶;
- 对每天的数据,进行以下过滤:
- 选出每天进行操作为“抢单成功”的文档(等一下说以下Term),使用fliter的terms,并统计数量,使用value count。
- 选出每天进行操作为“确认方案”的文档,并统计数量
- 使用Bucket Script Aggregation(桶脚本聚合),计算确认方案率。
Java 代码
public SearchResponse orderOkPercent(GrabOrderDTO grabOrderDTO) throws IOException {
SearchRequest request = new SearchRequest(ES_INDEX);
request.types(TYPE);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().size(0);
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
//如果指定了定制师
if (!Strings.isNullOrEmpty(grabOrderDTO.getOperator())){
MatchPhraseQueryBuilder matchPhraseQueryBuilder = QueryBuilders.matchPhraseQuery("operator", grabOrderDTO.getOperator());
boolQueryBuilder.must(matchPhraseQueryBuilder);
}
//如果指定了起始时间则开启范围查询
if (!Strings.isNullOrEmpty(grabOrderDTO.getStartTime())){
RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery("time");
rangeQueryBuilder.gte(grabOrderDTO.getStartTime());
rangeQueryBuilder.lte(grabOrderDTO.getEndTime());
boolQueryBuilder.must(rangeQueryBuilder);
}
//日期直方图聚合
DateHistogramAggregationBuilder dateHistogramAggregationBuilder = AggregationBuilders.dateHistogram("day_order");
dateHistogramAggregationBuilder.field("time");
//抢单成功聚合
dateHistogramAggregationBuilder.dateHistogramInterval(setInterval(grabOrderDTO.getInterval()));
AggregationBuilder grabAggregationBuilders =
AggregationBuilders.filter("grab_order_success",
QueryBuilders.termsQuery("operating", "抢", "单","成","功"));
ValueCountAggregationBuilder grabValueCountAggregationBuilder = AggregationBuilders.count("order_success_count").field("time");
grabAggregationBuilders.subAggregation(grabValueCountAggregationBuilder);
//确认方案聚合
AggregationBuilder okAggregationBuilder =
AggregationBuilders.filter("ok_order",
QueryBuilders.termsQuery("operating", "确", "认", "方", "案"));
ValueCountAggregationBuilder okValueCountAggregationBuilder = AggregationBuilders.count("order_ok_count").field("time");
okAggregationBuilder.subAggregation(okValueCountAggregationBuilder);
// 脚本
HashMap<String, String> script = Maps.newHashMap();
script.put("orderGrab", "grab_order_success>order_success_count");
script.put("orderOk", "ok_order>order_ok_count");
Script okOrderPercent = new Script("params.orderOk/params.orderGrab*100");
BucketScriptPipelineAggregationBuilder bucketScriptPipelineAggregationBuilder = new BucketScriptPipelineAggregationBuilder("percent_customizer", script, okOrderPercent);
dateHistogramAggregationBuilder.subAggregation(grabAggregationBuilders);
dateHistogramAggregationBuilder.subAggregation(okAggregationBuilder);
dateHistogramAggregationBuilder.subAggregation(bucketScriptPipelineAggregationBuilder);
searchSourceBuilder.query(boolQueryBuilder).aggregation(dateHistogramAggregationBuilder);
request.source(searchSourceBuilder);
return client.search(request);
}
不得不说,Java Rest High client 有一种让人望文生意的感觉,用起来还是很爽的。
直接添加查询条件,添加聚合、子聚合,添加source,然后查询就可以了,此处不再赘述。
返回结果
以下是返回的结果(保留java 返回的原格式,再名称前有聚合名称标注):
{
"took":3,
"timed_out":false,
"_shards":{
"total":10,
"successful":10,
"skipped":0,
"failed":0
},
"hits":{
"total":10,
"max_score":0,
"hits":[
]
},
"aggregations":{
"date_histogram#day_order":{
"buckets":[
{
"key_as_string":"1536019200000",
"key":1536019200000,
"doc_count":1,
"filter#ok_order":{
"doc_count":0,
"value_count#order_ok_count":{ "value":0 } },
"filter#grab_order_success":{
"doc_count":1,
"value_count#order_success_count":{ "value":1 } },
"simple_value#percent_customizer":{
"value":0 }
},
{
"key_as_string":"1536105600000",
"key":1536105600000,
"doc_count":9,
"filter#ok_order":{
"doc_count":3,
"value_count#order_ok_count":{ "value":3 } },
"filter#grab_order_success":{
"doc_count":6,
"value_count#order_success_count":{ "value":6 } },
"simple_value#percent_customizer":{
"value":50 }
}
]
}
}
}
此处返回了两条记录。
再aggregations中,时一个名为day_order的date_histogram聚合,包含两个桶(bucket),获取桶的Java代码如下:
//此处其实返回的是一个data_date_histogram类型,'#'前面即表示类型
Aggregation aggregation = response.getAggregations().get("day_order");
List<? extends Histogram.Bucket> buckets = ((Histogram)aggregation).getBuckets();
// 遍历返回的桶
for (Histogram.Bucket bucket : buckets){
// 做你的处理
}
针对aggregations的第一条:
{
"key_as_string":"1536019200000",
"key":1536019200000,
"doc_count":1,
"filter#ok_order":{
"doc_count":0,
"value_count#order_ok_count":{
"value":0
}
},
"filter#grab_order_success":{
"doc_count":1,
"value_count#order_success_count":{
"value":1
}
},
"simple_value#percent_customizer":{
"value":0
}
}
结果中分别有:
1. ok_order,订单确认数,类型为filter,里面包含一个value_count 类型的数值计数order_ok_count
2. grab_order_success,订单抢单成功数量,结构同上
3. percent_customizer,类型为simple_value,为桶脚本聚合产生的结果的值。
话不多说,上代码:
// 获得Filter过滤,使用okFilter.getValue(),获取对应的值
Filter okFilter = bucket.getAggregations().get("ok_order");
// 获得ValueCount结果,使用okCount.getValue()
ValueCount okCount = okFilter.getAggregations().get("order_ok_count");
Filter grabFilter = bucket.getAggregations().get("grab_order_success");
ValueCount grabCount = grabFilter.getAggregations().get("order_success_count");
// 获得百分比,使用value.getValueAsString()获取对应的值
SimpleValue value = bucket.getAggregations().get("percent_customizer");
解析完整代码:
public void aggreation2Percent(SearchResponse response){
Aggregation aggregation = response.getAggregations().get("day_order");
List<? extends Histogram.Bucket> buckets = ((Histogram)aggregation).getBuckets();
// 遍历返回的桶
for (Histogram.Bucket bucket : buckets){
PercentCustomizer percentCustomizer = new PercentCustomizer();
percentCustomizer.setKey_as_string(bucket.getKeyAsString());
// 获得Filter过滤
Filter okFilter = bucket.getAggregations().get("ok_order");
// 获得ValueCount结果
ValueCount okCount = okFilter.getAggregations().get("order_ok_count");
Filter grabFilter = bucket.getAggregations().get("grab_order_success");
ValueCount grabCount = grabFilter.getAggregations().get("order_success_count");
}
}
总结
知道返回结果各个字段代表的意义后,就很简单了。可是之前一直在傻傻的想get后的值到底是什么意思…
知道是什么意思之后,敲起代码来就舒服多了。