@Service
@Slf4j
public class DynamicSecurityScanServiceImpl implements DynamicSecurityScanService {
@Qualifier("elasticsearchTemplate")
@Autowired
private ElasticsearchRestTemplate elasticsearchRestTemplate;
@Qualifier("elasticsearchClient")
@Autowired
private RestHighLevelClient restHighLevelClient;
@Autowired
private DynamicSecurityScanDocRepository repository;
/**
*
* @param overviewDto
* @return Page 自定义分页对象
* @throws IOException
* bool包含四种操作符,分别是must,should,must_not,query filter查询查询对结果进行缓存
* match分析器,模糊匹配 term精准 multi_match多个字段同时进行匹配
* from to浅分页 scroll 深分页
* int from = (paramPI.getPageNum()-1)*paramPI.getPageSize();
* sourceBuilder.from(from);
*/
@Override
public Page<DynamicSecurityScanDoc> searchSystemDoc(OverviewDto overviewDto) throws IOException {
Page page = new Page(overviewDto.getCurPage(), overviewDto.getPageSize());
final Scroll scroll = new Scroll(TimeValue.timeValueSeconds(60));
// 1、创建search请求
SearchRequest searchRequest = new SearchRequest(Constant.INDEXFORVERSIONDETAUL);
searchRequest.scroll(scroll);
// 2、用SearchSourceBuilder来构造查询请求体 ,请仔细查看它的方法,构造各种查询的方法都在这。
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().trackTotalHits(true);
sourceBuilder.size(overviewDto.getPageSize());
//搜索条件
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
if (!StringUtils.isEmpty(overviewDto.getUser())) {
MultiMatchQueryBuilder userQuery = QueryBuilders.multiMatchQuery(overviewDto.getUser(), "owner", "developer", "tester");
boolQuery.must(userQuery);
}
if (!StringUtils.isEmpty(overviewDto.getAppName())) {
MatchQueryBuilder appNameQuery = QueryBuilders.matchQuery("appName", overviewDto.getAppName());
boolQuery.must(appNameQuery);
}
if (!StringUtils.isEmpty(overviewDto.getAppVersion())) {
TermQueryBuilder appversionQuery = QueryBuilders.termQuery("appVersion.keyword", overviewDto.getAppVersion());
boolQuery.must(appversionQuery);
}
if(!StringUtils.isEmpty(overviewDto.getBranchName())){
MatchQueryBuilder branchNameQuery=QueryBuilders.matchQuery("branchName", overviewDto.getBranchName());
boolQuery.must(branchNameQuery);
}
if(!StringUtils.isEmpty(overviewDto.getVulnerabilityResult())){
TermQueryBuilder vulnerabilityResultQuery=QueryBuilders.termQuery("vulnerabilityResult.keyword", overviewDto.getVulnerabilityResult());
boolQuery.must(vulnerabilityResultQuery);
}
if(!StringUtils.isEmpty(overviewDto.getType())){
TermQueryBuilder typeQuery=QueryBuilders.termQuery("type.keyword", overviewDto.getType());
boolQuery.must(typeQuery);
}
sourceBuilder.query(boolQuery);
DslService.printDsl(sourceBuilder);
//将请求体加入到请求中
searchRequest.source(sourceBuilder);
//3、发送请求
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
//处理搜索命中文档结果
SearchHits hits = searchResponse.getHits();
int totalHits = (int) hits.getTotalHits().value;
List<DynamicSecurityScanDoc> list = new ArrayList<>();
String scrollId = null;
int pageNum = overviewDto.getCurPage();
int count = 1;
while (searchResponse.getHits().getHits().length != 0){
if(count == pageNum){
execute(hits, list);
log.info("ES分页查询成功");
break;
}
count++;
//每次循环完后取得scrollId,用于记录下次将从这个游标开始取数
scrollId = searchResponse.getScrollId();
SearchScrollRequest scrollRequest = new SearchScrollRequest(scrollId);
scrollRequest.scroll(scroll);
searchResponse = restHighLevelClient.scroll(scrollRequest, RequestOptions.DEFAULT);
}
if(scrollId != null){
//清除滚屏
ClearScrollRequest clearScrollRequest = new ClearScrollRequest();
//也可以选择setScrollIds()将多个scrollId一起使用
clearScrollRequest.addScrollId(scrollId);
restHighLevelClient.clearScroll(clearScrollRequest,RequestOptions.DEFAULT);
}
page.setTotalCount(totalHits);
page.setList(list);
return page;
}
ES分页
size+from浅分页 按照一般的查询流程来说,如果我想查询前10条数据: 客户端请求发送给某个节点 节点转发给各个分片,查询每个分片上的前10条中的部分数据 结果返回给节点, 整合数据,提取前10条 返回给请求客户端
GET sdl-overview/_search
{
"from": 1,
"size": 20,
"query": {
"wildcard": {
"appName.keyword": {
"value": "*0*"
}
}
}
}
- 这种浅分页只适合少量数据, 因为隋from增大,查询的时间就会越大;而且数据越大,查询的效率指数下降.
- 优点: from+size在数据量不大的情况下,效率比较高.
- 缺点: 在数据量非常大的情况下,from+size分页会把全部记录加载到内存中,这样做不但运行速递特别慢,而且容易让es出现内存不足而挂掉.
scroll深分页
如果请求的页数较少(假设每页20个docs), Elasticsearch不会有什么问题,但是如果页数较大时,比如请求第20页,Elasticsearch不得不取出第1页到第20页的所有docs,再去除第1页到第19页的docs,得到第20页的docs。
解决的方式就是使用scroll,scroll就是维护了当前索引段的一份快照信息–缓存(这个快照信息是你执行这个scroll查询时的快照)。
- 初始化 可以把 scroll 分为初始化和遍历两步: 1、初始化时将所有符合搜索条件的搜索结果缓存起来,可以想象成快照 2、遍历时,从这个快照里取数据
GET sdl-overview/_search?scroll=5m
{
"size": 5,
"query": {
"wildcard": {
"appName.keyword": {
"value": "*0*"
}
}
}
}
- 遍历 在遍历时候,拿到上一次遍历中的_scroll_id,然后带scroll参数,重复上一次的遍历步骤,知道返回的数据为空,就表示遍历完成
复杂的业务查询DSL(bool)
{
"query": {
"bool": {
"must": [
{"term":{"appVersion.keyword":"external-20200702-5009"}},
{"match":{"appName":"葵花谱"}},
{"term":{"type.keyword":"0"}},
{"match":{"branchName":"release-1.0"}},
{"term":{"vulnerabilityResult.keyword":"1"}},
{"multi_match": {
"query": "Gosaint3",
"fields": ["developer","tester","owner"]
}
}
]
}
}
}