Java操作elasticsearch8.2.0索引库

  • 1.倒排索引
  • 2.正向和倒排
  • 3.连接ElasticSearch
  • 3.1 导入maven依赖
  • 3.2 Java连接ElasticSearch
  • 4.对索引的增删操作


1.倒排索引

倒排索引中有两个非常重要的概念:

  • 文档(Document):用来搜索的数据,其中的每一条数据就是一个文档。例如一个网页、一个商品信息
  • 词条(Term):对文档数据或用户搜索数据,利用某种算法分词,得到的具备含义的词语就是词条。例如:我是中国人,就可以分为:我、是、中国人、中国、国人这样的几个词条

2.正向和倒排

那么为什么一个叫做正向索引,一个叫做倒排索引呢?

  • 正向索引是最传统的,根据id索引的方式。但根据词条查询时,必须先逐条获取每个文档,然后判断文档中是否包含所需要的词条,是根据文档找词条的过程
  • 倒排索引则相反,是先找到用户要搜索的词条,根据词条得到保护词条的文档的id,然后根据id获取文档。是根据词条找文档的过程

是不是恰好反过来了?
那么两者方式的优缺点是什么呢?

正向索引

  • 优点:
  • 可以给多个字段创建索引
  • 根据索引字段搜索、排序速度非常快
  • 缺点:
  • 根据非索引字段,或者索引字段中的部分词条查找时,只能全表扫描。

倒排索引

  • 优点:
  • 根据词条搜索、模糊搜索时,速度非常快
  • 缺点:
  • 只能给词条创建索引,而不是字段
  • 无法根据字段做排序

创建倒排索引是对正向索引的一种特殊处理,流程如下:

  • 将每一个文档的数据利用算法分词,得到一个个词条
  • 创建表,每行数据包括词条、词条所在文档id、位置等信息
  • 因为词条唯一性,可以给词条创建索引,例如hash表结构索引

3.连接ElasticSearch

因为ElasticSearch8开始就有了登录校验,所以在Java操作时需要登录用户名和密码才能进行操作!

3.1 导入maven依赖

<dependency>
           <groupId>org.springframework.boot</groupId>
           <artifactId>spring-boot-starter-web</artifactId>
       </dependency>

       <dependency>
           <groupId>org.springframework.boot</groupId>
           <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
       </dependency>

       <dependency>
           <groupId>org.elasticsearch.client</groupId>
           <artifactId>elasticsearch-rest-client</artifactId>
       </dependency>

       <dependency>
           <groupId>co.elastic.clients</groupId>
           <artifactId>elasticsearch-java</artifactId>
           <version>8.2.0</version>
       </dependency>

       <dependency>
           <groupId>com.fasterxml.jackson.core</groupId>
           <artifactId>jackson-databind</artifactId>
           <version>2.12.3</version>
       </dependency>

       <dependency>
           <groupId>jakarta.json</groupId>
           <artifactId>jakarta.json-api</artifactId>
           <version>2.0.1</version>
       </dependency>

3.2 Java连接ElasticSearch

@BeforeEach
    void setUp() {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        // 用户名和密码
        Credentials creds = new UsernamePasswordCredentials("elastic", "J=9XqTBAk-2GLwd_msUx");
        credentialsProvider.setCredentials(AuthScope.ANY, creds);
        RestClient restClient = RestClient.builder(
                HttpHost.create("http://192.168.33.136:9200")
        ).setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
            @Override
            public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpAsyncClientBuilder) {
                return httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
            }
        }).build();
        RestClientTransport restClientTransport = new RestClientTransport(restClient, new JacksonJsonpMapper());
        elasticsearchClient = new ElasticsearchClient(restClientTransport);
    }

4.对索引的增删操作

package cn.itcast.hotel;

import co.elastic.clients.elasticsearch.ElasticsearchClient;
import co.elastic.clients.elasticsearch.indices.CreateIndexRequest;
import co.elastic.clients.json.jackson.JacksonJsonpMapper;
import co.elastic.clients.transport.rest_client.RestClientTransport;
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.util.ResourceUtils;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

/**
 * @author 尹稳健~
 * @version 1.0
 * @time 2023/5/28
 */
public class HotelIndexTest {
    private ElasticsearchClient elasticsearchClient;

    @Test
    void initialization() {
        System.out.println(elasticsearchClient);
    }

    //  J=9XqTBAk-2GLwd_msUx
    @Test
    void createIndexRequest() throws IOException {
        if (!elasticsearchClient.indices().exists(request -> request.index("hotel")).value()){
            File file = ResourceUtils.getFile("classpath:hotel.json");
            FileReader fileReader = new FileReader(file);
            CreateIndexRequest request = CreateIndexRequest.of(
                    builder -> builder.index("hotel")
                            .withJson(fileReader)
            );
            elasticsearchClient.indices().create(request);
        }

    }

    @Test
    void deleteIndexRequest() throws IOException {
        elasticsearchClient.indices().delete(request -> request.index("hotel"));
    }

    @BeforeEach
    void setUp() {
        final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
        Credentials creds = new UsernamePasswordCredentials("elastic", "J=9XqTBAk-2GLwd_msUx");
        credentialsProvider.setCredentials(AuthScope.ANY, creds);
        RestClient restClient = RestClient.builder(
                HttpHost.create("http://192.168.33.136:9200")
        ).setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback(){
            @Override
            public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpAsyncClientBuilder) {
                return httpAsyncClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
            }
        }).build();
        RestClientTransport restClientTransport = new RestClientTransport(restClient, new JacksonJsonpMapper());
        elasticsearchClient = new ElasticsearchClient(restClientTransport);
    }

}