Es有三种查询方式,不知道你平时工作中用到的是哪种呢?
一、from+Size
1、深度分页或者size特别大的时候,会出现deep pagination问题.并且因为Es自身的保护机制(max_result_window是10000),如果查出来的数据量大于10000的就会报错.
2、该查询的实际原理类似于mysql中的limit,比如查询第10001条数据,那么需要将前面的10000条都拿出来,进行过滤,最终才得到数据(性能比较差,实现简单,适用于少量数据).
二、scroll
1、高效进行滚动查询,首次查询会在内存中保存一个历史快照
以及游标(scroll_id),记录当前消息查询的终止位置,下次查询的时候将基于游标进行消费(性能良好,不具备实施性,一般是用于大量数据导出或索引重建)
2、可以查询10000条以上数据.
3、当使用完查询的数据之后,记得要手动清理
,因为scroll查询会生成快照,虽然会有过期时间,但是如果并发访问量激增的时候,都没达到过期时间,就会导致内存溢出.
三、search after
1、缺点是不能够随机跳转分页,只能是一页一页的向后翻(当有新数据进来,也能实时查询到),并且需要至少指定一个唯一不重复
字段来排序
(一般是_id)
2、当使用search_after时,from值必须设置为0或者-1
3、可以查询10000条以上数据.
from+size demo
QueryBuilder query = new QueryBuilder();
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
// 分页参数
.from(page.getPage())
.size(page.getSize())
.trackTotalHits(true)
// 过滤条件
.query(query);
if (sort != null) {
// 排序
searchSourceBuilder.sort(sort.getSortFiled(), sort.getSortOrder());
searchSourceBuilder.sort("_id", SortOrder.DESC);
}
if (fields != null) {
// 查询的结果字段
searchSourceBuilder.fetchSource(fields, null);
}
searchRequest.source(searchSourceBuilder);
try {
// 进行查询
SearchResponse searchResponse = restClient.search(searchRequest, RequestOptions.DEFAULT);
SearchHits searchHits = searchResponse.getHits();
if (searchHits == null) {
return EsResult.EMPTY;
}
// 结果转换,clazz是对应的DTO
List<T> data = covert2JavaBeanList(searchHits.getHits(), clazz);
return new EsResult<>(NumberUtils.toInt(String.valueOf(searchHits.getTotalHits().value)), data);
} catch (IOException e) {
EsExceptionUtil.dealIOException(e);
} catch (ElasticsearchException e1) {
EsExceptionUtil.dealElasticsearchException(e1);
}
return EsResult.EMPTY;
scroll demo
// 设置过期时间
Scroll scroll = new Scroll(TimeValue.timeValueMinutes(30));
SearchResponse searchResponse = null;
if (StringUtils.isEmpty(scrollId)) {
// 1.构建SearchRequest检索请求
// 专门用来进行全文检索、关键字检索的API
SearchRequest searchRequest = new SearchRequest(index);
// 2.创建一个SearchSourceBuilder专门用于构建查询条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.trackTotalHits(true)
.query(query);
if (CollectionUtils.isNotEmpty(sorts)) {
for (EsSort sort : sorts) {
// 排序字段
searchSourceBuilder.sort(sort.getSortFiled(), sort.getSortOrder());
}
}
if (fields != null) {
searchSourceBuilder.fetchSource(fields, null);
}
// 每页显示多少条
searchSourceBuilder.size(pageSize);
// 4.调用SearchRequest.source将查询条件设置到检索请求
searchRequest.source(searchSourceBuilder);
//--------------------------
// 设置scroll查询
//--------------------------
searchRequest.scroll(scroll);
try {
searchResponse = restClient.search(searchRequest, RequestOptions.DEFAULT);
return gettEsScrollResult(clazz, searchResponse);
} catch (IOException e) {
EsExceptionUtil.dealIOException(e);
} catch (ElasticsearchException e1) {
EsExceptionUtil.dealElasticsearchException(e1);
}
}
// 第二次查询的时候,直接通过scroll id查询数据
else {
SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId);
searchScrollRequest.scroll(scroll);
// 使用RestHighLevelClient发送scroll请求
try {
searchResponse = restClient.scroll(searchScrollRequest, RequestOptions.DEFAULT);
return gettEsScrollResult(clazz, searchResponse);
} catch (IOException e) {
EsExceptionUtil.dealIOException(e);
} catch (ElasticsearchException e1) {
EsExceptionUtil.dealElasticsearchException(e1);
}
}
SearchHits searchHits;
List<T> data = Lists.newArrayList();
if (searchResponse != null) {
searchHits = searchResponse.getHits();
data = covert2JavaBeanList(searchHits.getHits(), clazz);
} else {
searchHits = SearchHits.empty();
}
return new EsScrollResult<>(NumberUtils.toInt(String.valueOf(searchHits.getTotalHits().value)), searchResponse != null ? searchResponse.getScrollId() : "", data);
手动清理scroll demo
public void clearScroll(List<String> scrollIds) {
if (CollectionUtils.isEmpty(scrollIds)) {
return;
}
List<String> notEmptyIds = scrollIds.stream().filter(o -> !StringUtils.EMPTY.equals(o)).collect(Collectors.toList());
if (CollectionUtils.isEmpty(notEmptyIds)) {
return;
}
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
notEmptyIds.forEach(clearScrollRequest::addScrollId);
try {
ClearScrollResponse clearScrollResponse = restClient.clearScroll(clearScrollRequest, RequestOptions.DEFAULT);
log.info("清理scrollId:{},状态:{},释放空间:{}", notEmptyIds, clearScrollResponse.isSucceeded(), clearScrollResponse.getNumFreed());
} catch (IOException e) {
log.warn("清理scroll失败", e);
}
}
search after demo
SearchRequest searchRequest = new SearchRequest(index);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(query)
.fetchSource(fields, null)
.trackTotalHits(true)
.size(pageSize);
// 判断是否是第一次查询
if (sortValues != null && sortValues.length != 0) {
searchSourceBuilder.searchAfter(sortValues);
}
boolean idSort = false;
if (CollectionUtils.isNotEmpty(sorts)) {
for (EsSort sort : sorts) {
searchSourceBuilder.sort(sort.getSortFiled(), sort.getSortOrder());
// 保证有唯一的排序字段
if ("_id".equals(sort.getSortFiled())) {
idSort = true;
}
}
}
if (!idSort) {
throw new EsDataException("排序参数必须包含_id");
}
searchRequest.source(searchSourceBuilder);
try {
SearchResponse searchResponse = restClient.search(searchRequest, RequestOptions.DEFAULT);
if (searchResponse.getHits() == null || searchResponse.getHits().getHits().length == 0) {
return EsSearchAfterResult.EMPTY;
}
SearchHit[] searchHits = searchResponse.getHits().getHits();
List<T> data = covert2JavaBeanList(searchHits, clazz);
// 取出最后一条记录的位置
SearchHit lastHit = searchHits[searchHits.length - 1];
// 得到最后一条记录的sortValue值,返回给调用方,让调用方作为下一页查询的条件
Object[] lastSortValues = lastHit.getSortValues();
return new EsSearchAfterResult<>(NumberUtils.toInt(String.valueOf(searchResponse.getHits().getTotalHits().value)), data, lastSortValues);
} catch (IOException e) {
EsExceptionUtil.dealIOException(e);
} catch (ElasticsearchException e1) {
EsExceptionUtil.dealElasticsearchException(e1);
}
return EsSearchAfterResult.EMPTY;
总结:
1、from + size的大小和es的保护机制的大小要一致,否则会报错
2、使用scroll的时候一定要记得要记得手动清理掉缓存
3、searchAfter 一定要设置唯一的排序字段,否则可能导致数据查询的少.