1 SpringBoot 集成ES集群
1.2 pom
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.3.0.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!--日志-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-logging</artifactId>
</dependency>
<!--公用包-->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.71</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.elasticsearch.client/elasticsearch-rest-high-level-client -->
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.6.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
<version>7.6.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>2.3.0.RELEASE</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
1.3 application.yml
server:
port: 9000
elasticsearch:
ips: 192.168.38.50:9200,192.168.38.51:9200,192.168.38.52:9200
scheme: http
1.4 ElasticSearchConfig
@Configuration
public class ElasticSearchConfig {
@Value("${elasticsearch.ips}")
private String[] ipAddress;
@Value("${elasticsearch.scheme}")
public String scheme;
@Bean
public RestClientBuilder restClientBuilder() {
HttpHost[] hosts = Arrays.stream(ipAddress)
.map(this::makeHttpHost)
.filter(Objects::nonNull)
.toArray(HttpHost[]::new);
return RestClient.builder(hosts).setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(null));
}
@Bean(name = "highLevelClient")
public RestHighLevelClient highLevelClient(RestClientBuilder restClientBuilder) {
return new RestHighLevelClient(restClientBuilder);
}
private HttpHost makeHttpHost(String s) {
String[] address = s.split(":");
if (address.length == 2) {
String ip = address[0];
int port = Integer.parseInt(address[1]);
return new HttpHost(ip, port,scheme);
} else {
return null;
}
}
}
1.5 实体
@AllArgsConstructor
@NoArgsConstructor
@Data
@ToString
public class JobEntity {
/**
* id
*/
private Long id;
/**
* 年龄
*/
private Integer age;
/**
* 地区
*/
private String area;
/**
* 期望
*/
private String expect;
/**
* 学历
*/
private String education;
/**
* 薪水
*/
private String salary;
/**
* 类型
*/
private String type;
/**
* 标题
*/
private String title;
/**
* 工作描述
*/
private String jobDescription;
}
2 ES库添加
@RunWith(SpringRunner.class)
@SpringBootTest(classes = ElasticSearchApplication.class)
@Slf4j
public class ElasticSearchTest {
// 索引库的名字
private static final String JOB_IDX = "job_index";
@Qualifier("highLevelClient")
@Autowired
private RestHighLevelClient restHighLevelClient;
@Test
public void addJobTest() throws IOException {
JobEntity jobEntity = new JobEntity();
jobEntity.setId(1L);
jobEntity.setAge(30);
jobEntity.setArea("北京");
jobEntity.setExpect("3年工作经验");
jobEntity.setJobDescription("高级开发工程师,抗压能力强,热爱学习。");
jobEntity.setSalary("20K/月");
jobEntity.setEducation("本科及以上");
jobEntity.setType("全职");
jobEntity.setTitle("JAVA高级开发工程师");
//1.构建IndexRequest对象,用来描述ES发起请求的数据。
IndexRequest indexRequest = new IndexRequest(JOB_IDX);
//2 设置文档ID
indexRequest.id(String.valueOf(jobEntity.getId()));
//3 转JSON
String json = JSON.toJSONString(jobEntity);
//4 使用IndexRequest.source方法设置文档数据,并设置请求的数据为JSON格式。
indexRequest.source(json, XContentType.JSON);
//5 用ES High level client调用index方法发起请求,将一个文档添加到索引中。
restHighLevelClient.index(indexRequest, RequestOptions.DEFAULT);
log.info("添加成功");
}
}
3 ES 根据ID查找数据
@Test
public void findJobTest() throws IOException {
Long id = 1L;
// 1 构建GetRequest请求。
GetRequest getRequest = new GetRequest(JOB_IDX, String.valueOf(id));
// 2 使用RestHighLevelClient.get发送GetRequest请求,并获取到ES服务器的响应。
GetResponse getResponse = restHighLevelClient.get(getRequest, RequestOptions.DEFAULT);
// 3 将ES响应的数据转换为JSON字符串
String json = getResponse.getSourceAsString();
// 4 并使用FastJSON将JSON字符串转换为JobDetail类对象
JobEntity jobDetail = JSON.parseObject(json, JobEntity.class);
// 5 记得:单独设置ID
jobDetail.setId(id);
log.info(jobDetail.toString());
}
4 ES 根据ID更新数据
@Test
public void updateJobTest() throws IOException {
JobEntity jobEntity = new JobEntity();
jobEntity.setId(1L);
jobEntity.setAge(30);
//地区改成深圳
jobEntity.setArea("深圳");
jobEntity.setExpect("3年工作经验");
jobEntity.setJobDescription("高级开发工程师,抗压能力强,热爱学习。");
jobEntity.setSalary("20K/月");
jobEntity.setEducation("本科及以上");
jobEntity.setType("全职");
jobEntity.setTitle("JAVA高级开发工程师");
// 1判断对应ID的文档是否存在
GetRequest getRequest = new GetRequest(JOB_IDX, String.valueOf(jobEntity.getId()));
boolean exists = restHighLevelClient.exists(getRequest, RequestOptions.DEFAULT);
//2 如果存在 就更新
if(exists) {
UpdateRequest updateRequest = new UpdateRequest(JOB_IDX, String.valueOf(jobEntity.getId()));
updateRequest.doc(JSON.toJSONString(jobEntity), XContentType.JSON);
restHighLevelClient.update(updateRequest, RequestOptions.DEFAULT);
}
}
5 根据ID删除
@Test
public void deleteJobTest() throws IOException {
Long id = 1L;
// 1. 构建delete请求
DeleteRequest deleteRequest = new DeleteRequest(JOB_IDX, String.valueOf(id));
// 2. 使用RestHighLevelClient执行delete请求
restHighLevelClient.delete(deleteRequest, RequestOptions.DEFAULT);
}
6 根据关键字分页搜索
在存在大量数据时,一般我们进行查询都需要进行分页查询。例如:我们指定页码、并指
定每页显示多少条数据,然后Elasticsearch返回对应页码的数据。
6.1 数据准备
https://gitee.com/zhurongsheng/elasticsearch-data/blob/master/es.data
6.2 使用from和size来进行分页
这是ES分页最常用的一种方案,跟mysql类似,from指定查询的起始位置,size表示从起始位置开始的文档数量。看个例子。
GET /bank/_search
{
"query": {
"match_all": {}
},
"from": 0,
"size": 10
}
ES默认的分页深度是10000,也就是说from+size超过10000就会报错。事实上,ES之所以有这个限制,是因为在分布式环境下深度分页的查询效率会非常低。比如我们现在查询第from=990,size=10这样的条件,这个在业务层就是查询第990页,每页展示10条数据。但是在ES处理的时候,会分别从每个分片上拿到1000条数据,然后在coordinating的节点上根据查询条件聚合出1000条记录,最后返回其中的10条。所以分页越深,ES处理的开销就大,占用内存就越大。
描述: JAVA方法。
@RunWith(SpringRunner.class)
@SpringBootTest(classes = ElasticSearchApplication.class)
@Slf4j
public class EsPagingTest {
// 索引库的名字
private static final String BANK_INDEX = "bank";
@Qualifier("highLevelClient")
@Autowired
private RestHighLevelClient restHighLevelClient;
@Test
public void fromSizeTest() throws IOException {
//根据Duke关键字搜索第1页数据,每页10条
JSONObject jsonObject = searchByPage("Duke", 1, 10);
log.info(jsonObject.toJSONString());
}
public JSONObject searchByPage(String keywords, int pageNum, int pageSize) throws IOException {
// 1.构建SearchRequest检索请求
SearchRequest searchRequest = new SearchRequest(BANK_INDEX);
// 2.创建一个SearchSourceBuilder专门用于构建查询条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 3.使用QueryBuilders.multiMatchQuery构建一个查询条件
MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(keywords, "firstname", "lastname", "address");
// 将查询条件设置到查询请求构建器中
searchSourceBuilder.query(multiMatchQueryBuilder);
// 每页显示多少条
searchSourceBuilder.size(pageSize);
// 设置从第几条开始查询
searchSourceBuilder.from((pageNum - 1) * pageSize);
// 4.调用SearchRequest.source将查询条件设置到检索请求
searchRequest.source(searchSourceBuilder);
// 5.执行RestHighLevelClient.search发起请求
SearchResponse searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
SearchHit[] hitArray = searchResponse.getHits().getHits();
// 6.遍历结果
JSONArray jsonArray = new JSONArray();
for (SearchHit documentFields : hitArray) {
// 1)获取命中的结果
String json = documentFields.getSourceAsString();
// 2)将JSON字符串转换为对象
JSONObject jsonObject = JSON.parseObject(json);
// 3)使用SearchHit.getId设置文档ID
jsonObject.put("id",documentFields.getId());
jsonArray.add(jsonObject);
}
// 7 封装返回结果
long totalNum = searchResponse.getHits().getTotalHits().value;
JSONObject rs = new JSONObject();
rs.put("total", totalNum);
rs.put("content", jsonArray);
return rs;
}
}
{
"total": 2,
"content": [
{
"account_number": 1,
"firstname": "Amber",
"address": "880 Holmes Lane",
"balance": 39225,
"gender": "M",
"city": "Brogan",
"employer": "Pyrami",
"state": "IL",
"id": "1",
"age": 32,
"email": "amberduke@pyrami.com",
"lastname": "Duke"
},
{
"account_number": 776,
"firstname": "Duke",
"address": "520 Doscher Street",
"balance": 29177,
"gender": "M",
"city": "Lafferty",
"employer": "Tripsch",
"state": "NC",
"id": "776",
"age": 24,
"email": "dukeatkinson@tripsch.com",
"lastname": "Atkinson"
}
]
}
6.3 使用scroll方式进行分页
在进行大量分页时,每次分页都需要将要查询的数据进行重新排序,这样非常浪费性能。使用scroll是将要用的数据一次性排序好,然后分批取出。性能要比from + size好得多。使用scroll查询后,排序后的数据会保持一定的时间,后续的分页查询都从该快照取数据即可。
GET /bank/_search?scroll=60m
{
"query": {
"multi_match": {
"query": "Street",
"fields": ["address","firstname","lastname"]
}
},
"from": 0,
"size": 10
}
描述: java代
GET _search/scroll
{
"scroll_id": "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAABicWWTROVmNmRHBURDZKS1JiQXhDUDhHUQ==",
"scroll": "60m"
}
@Test
public void testPageByScroll() throws IOException {
//每页10条数据,根据Street搜索第2页数据
int pageNum = 2;
JSONObject rs = null;
String scrollId = null;
for (int i = 1; i <= pageNum; i++) {
rs = searchByScrollPage("Street", scrollId, 10);
scrollId = rs.getString("scroll_id");
}
log.info(rs.toJSONString());
}
public JSONObject searchByScrollPage(String keywords, String scrollId, int pageSize) throws IOException {
SearchResponse searchResponse = null;
if (scrollId == null) {
// 1.构建SearchRequest检索请求
SearchRequest searchRequest = new SearchRequest(BANK_INDEX);
// 2.创建一个SearchSourceBuilder专门用于构建查询条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// 3.使用QueryBuilders.multiMatchQuery构建一个查询条件
MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(keywords, "firstname", "lastname", "address");
// 将查询条件设置到查询请求构建器中
searchSourceBuilder.query(multiMatchQueryBuilder);
// 设置高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("firstname");
highlightBuilder.field("lastname");
highlightBuilder.field("address");
highlightBuilder.preTags("<font color='red'>");
highlightBuilder.postTags("</font>");
// 给请求设置高亮
searchSourceBuilder.highlighter(highlightBuilder);
// 每页显示多少条
searchSourceBuilder.size(pageSize);
// 4.调用SearchRequest.source将查询条件设置到检索请求
searchRequest.source(searchSourceBuilder);
//--------------------------
// 设置scroll查询
//--------------------------
searchRequest.scroll(TimeValue.timeValueMinutes(5));
// 5.执行RestHighLevelClient.search发起请求
searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
}
// 第二次查询的时候,直接通过scroll id查询数据
else {
SearchScrollRequest searchScrollRequest = new SearchScrollRequest(scrollId);
searchScrollRequest.scroll(TimeValue.timeValueMinutes(5));
// 使用RestHighLevelClient发送scroll请求
searchResponse = restHighLevelClient.scroll(searchScrollRequest, RequestOptions.DEFAULT);
}
SearchHit[] hitArray = searchResponse.getHits().getHits();
// 6.遍历结果
JSONArray jsonArray = new JSONArray();
for (SearchHit documentFields : hitArray) {
String json = documentFields.getSourceAsString();
JSONObject jsonObject = JSON.parseObject(json);
jsonObject.put("id", documentFields.getId());
jsonArray.add(jsonObject);
// 设置高亮的一些文本到实体类中
// 封装了高亮
Map<String, HighlightField> highlightFieldMap = documentFields.getHighlightFields();
HighlightField fNameHL = highlightFieldMap.get("firstname");
HighlightField lNameHL = highlightFieldMap.get("lastname");
HighlightField addressHL = highlightFieldMap.get("address");
if (fNameHL != null) {
// 获取指定字段的高亮片段
Text[] fragments = fNameHL.getFragments();
// 将这些高亮片段拼接成一个完整的高亮字段
StringBuilder builder = new StringBuilder();
for (Text text : fragments) {
builder.append(text);
}
// 设置到实体类中
jsonObject.put("firstname", builder.toString());
}
if (lNameHL != null) {
Text[] fragments = lNameHL.getFragments();
StringBuilder builder = new StringBuilder();
for (Text text : fragments) {
builder.append(text);
}
jsonObject.put("lastname", builder.toString());
}
if (addressHL != null) {
Text[] fragments = addressHL.getFragments();
StringBuilder builder = new StringBuilder();
for (Text text : fragments) {
builder.append(text);
}
jsonObject.put("address", builder.toString());
}
}
// 8. 将结果封装到Map结构中(带有分页信息)
long totalNum = searchResponse.getHits().getTotalHits().value;
JSONObject rs = new JSONObject();
rs.put("scroll_id", searchResponse.getScrollId());
rs.put("content", jsonArray);
rs.put("totalNum", totalNum);
return rs;
}
查看结果:
{
"totalNum": 385,
"scroll_id": "DXF1ZXJ5QW5kRmV0Y2gBAAAAAAAAAM8WZ2M1bHplSFhTWEtGV0p6ZTBfb1lMUQ==",
"content": [
{
"account_number": 145,
"firstname": "Rowena",
"address": "891 Elton <font color='red'>Street</font>",
"balance": 47406,
"gender": "M",
"city": "Ripley",
"employer": "Asimiline",
"state": "NH",
"id": "145",
"age": 32,
"email": "rowenawilkinson@asimiline.com",
"lastname": "Wilkinson"
},
{
"account_number": 152,
"firstname": "Wolfe",
"address": "457 Guernsey <font color='red'>Street</font>",
"balance": 8088,
"gender": "M",
"city": "Adelino",
"employer": "Hivedom",
"state": "MS",
"id": "152",
"age": 21,
"email": "wolferocha@hivedom.com",
"lastname": "Rocha"
},
{
"account_number": 164,
"firstname": "Cummings",
"address": "308 Schaefer <font color='red'>Street</font>",
"balance": 9101,
"gender": "F",
"city": "Chaparrito",
"employer": "Comtrak",
"state": "WI",
"id": "164",
"age": 26,
"email": "cummingslittle@comtrak.com",
"lastname": "Little"
},
{
"account_number": 188,
"firstname": "Tia",
"address": "583 Ainslie <font color='red'>Street</font>",
"balance": 41504,
"gender": "F",
"city": "Summerset",
"employer": "Jasper",
"state": "UT",
"id": "188",
"age": 24,
"email": "tiamiranda@jasper.com",
"lastname": "Miranda"
},
{
"account_number": 190,
"firstname": "Blake",
"address": "636 Diamond <font color='red'>Street</font>",
"balance": 3150,
"gender": "F",
"city": "Crumpler",
"employer": "Quantasis",
"state": "KY",
"id": "190",
"age": 30,
"email": "blakedavidson@quantasis.com",
"lastname": "Davidson"
},
{
"account_number": 195,
"firstname": "Kaye",
"address": "955 Hopkins <font color='red'>Street</font>",
"balance": 5025,
"gender": "M",
"city": "Ola",
"employer": "Zork",
"state": "WY",
"id": "195",
"age": 31,
"email": "kayegibson@zork.com",
"lastname": "Gibson"
},
{
"account_number": 203,
"firstname": "Eve",
"address": "435 Furman <font color='red'>Street</font>",
"balance": 21890,
"gender": "M",
"city": "Jamestown",
"employer": "Assitia",
"state": "MN",
"id": "203",
"age": 33,
"email": "evewyatt@assitia.com",
"lastname": "Wyatt"
},
{
"account_number": 227,
"firstname": "Coleman",
"address": "776 Little <font color='red'>Street</font>",
"balance": 19780,
"gender": "M",
"city": "Eagleville",
"employer": "Exoteric",
"state": "WV",
"id": "227",
"age": 22,
"email": "colemanberg@exoteric.com",
"lastname": "Berg"
},
{
"account_number": 239,
"firstname": "Chang",
"address": "895 Brigham <font color='red'>Street</font>",
"balance": 25719,
"gender": "M",
"city": "Belgreen",
"employer": "Qaboos",
"state": "NH",
"id": "239",
"age": 36,
"email": "changboyer@qaboos.com",
"lastname": "Boyer"
},
{
"account_number": 246,
"firstname": "Katheryn",
"address": "259 Kane <font color='red'>Street</font>",
"balance": 28405,
"gender": "F",
"city": "Bath",
"employer": "Quantalia",
"state": "TX",
"id": "246",
"age": 21,
"email": "katherynfoster@quantalia.com",
"lastname": "Foster"
}
]
}
描述: 结果验证
GET /bank/_search
{
"query": {
"multi_match": {
"query": "Street",
"fields": ["address","firstname","lastname"]
}
},
"from": 10,
"size": 10
}