ES 分片查询查看es分片数量

转载
bigrobin 2024-02-09 06:09:47
文章标签 ES 分片查询 elasticsearch 搜索字段 analyzer 文章分类 架构后端开发
###1，es的命令
GET _search
{
  "query": {
    "match_all": {}
  }
}

#新增索引的同时添加分片，不使用默认分片，分片的数量
#一般以节点数平方计算，比如有3个节点，分片数量9个，3个主分片和6个副本
#设置分片和副本方式一，创建索引时就指定
PUT /testshards
{
    "settings":{
        "number_of_shards":12,
        "number_of_replicas":1
    }
}
#方式二，索引创建后再设置
GET /_settings
PUT  testshards/_settings
{ 
  "number_of_shards":12,
  "number_of_replicas":1
  
}

GET /testshards/_settings
POST /testshards/_doc/3
{
  "title":"test"
}
GET /_cat/indices/testshards?v

#cat API检测集群是否健康。 确保9200端口号可用:绿色表示一切正常, 
#黄色表示所有的数据可用但是部分副本还没有分配#,红色表示部分数据因为某些原因不可用.

GET /_cat/health?v

#获取集群的节点列表：
GET /_cat/nodes?v

#查看集群状态
GET /_cluster/health

#获取所有索引信息
GET /_cluster/health?level=indices

#获取所有索引信息及分片状态信息
GET /_cluster/health?level=shards

#获取jvm信息
GET _nodes/_master/stats/os,jvm
#另外还有这些可指定
#indices：获得分片大小、文件数量、索引的创建和删除时间、搜索执行时间、字段缓存大小等
#fs/http/process/thread_pool/transport/beaker/discovery/ingest
GET /_cat/allocation?v
 
GET /_cat/shards?v
 
GET /_cat/shards/{index}?v
 
GET /_cat/master?v
 
GET /_cat/nodes?v
 
GET /_cat/indices?v
 
GET /_cat/indices/{index}?v
 
GET /_cat/segments?v
 
GET /_cat/segments/{index}?v
 
GET /_cat/count?v
 
GET /_cat/count/{index}?v
 
GET /_cat/recovery?v
 
GET /_cat/recovery/{index}?v
 
GET /_cat/health?v
 
GET /_cat/pending_tasks?v
 
GET /_cat/aliases?v
 
GET /_cat/aliases/{alias}?v
 
GET /_cat/thread_pool?v
 
GET /_cat/plugins?v
 
GET /_cat/fielddata?v
 
GET /_cat/fielddata/{fields}?v
 
GET /_cat/nodeattrs?v
 
GET /_cat/repositories?v
 
GET /_cat/snapshots/{repository}?v

#所有索引：
GET /_cat/indices?v

#获取映射
GET _mapping

#获取索引people映射
GET people/_mapping

#删除映射
DELETE people/_mapping

#获取people索引设置
GET people/_settings

#查看索引people的文档总数
GET people/_count

#获取分词,不用ik分词
GET /_analyze
{
  "analyzer": "standard",
  "text": "杭州"
}

#获取分词,用ik分词
GET /_analyze
{
  "analyzer": "ik_smart",
  "text": "吊爆了"
}
#获取分词,用ik分词
GET /_analyze
{
  "analyzer": "ik_max_word",
  "text": "吊爆了"
}
#索引时用ik_max_word，在搜索时用ik_smart。
#即：索引时最大化的将文章内容分词，搜索时更精确的搜索到想要的结果。区别在于，如果自定义词库时
#如小米手机，ik_smart会显示小米手机，而ik_max_word会拆成小米、手机、小米手机，更加细粒度

#关闭索引，关闭的索引用户不能写入或读取数据，打开_open相反,_clear清空，_refresh刷新，_optimize优化索引，flush
POST /people/_close

#refresh操作可以通过API设置：
POST /index/_settings
{"refresh_interval": "10s"}

#当我们进行大规模的创建索引操作的时候，最好将将refresh关闭。
POST /index/_settings
{"refresh_interval": "-1"}
#es默认的refresh间隔时间是1s，这也是为什么ES可以进行近乎实时的搜索。

#自定义映射，有2种方式，先创建索引再添加映射，创建索引库的同时添加映射。
#方式一
PUT user
GET user/_mapping
PUT user/_mapping
{
    "properties":{
        "userId":{
            "type":"keyword",
            "index":true
        },
        "userName":{
            "type":"text",
            "index":true,
            "analyzer":"ik_max_word"
        },
        "age":{
            "type":"integer",
            "index":false
        },
        "date":{
            "type":"date",
            "index":true,
            "format":"yyyy-MM-dd HH:mm:ss || yyyy-MM-dd || epoch_millis"
        }
    }
}
#方式二，创建索引库的同时添加映
PUT /user
{
    "mappings":{
            "properties":{
                "userId":{
                    "type":"keyword",
                    "index":true
                },
                "userName":{
                    "type":"text",
                    "index":true,
                    "analyzer":"ik_max_word"
                },
                "age":{
                    "type":"integer",
                    "index":false
                },
                "date":{
                    "type":"date",
                    "index":true,
                    "format":"yyyy-MM-dd HH:mm:ss || yyyy-MM-dd || epoch_millis"
                }
        }
    }
}

###  创建一个新的 people 索引,指定映射类型
PUT /people
{
  
    "mappings":{
        "properties":{
            "location":{
                "type":"geo_point",
                 "index":true
            },
            "name":{
                "type":"text",
                "index":true,
                 "analyzer":"ik_max_word"
            },
            "country":{
                "type":"keyword",
                "index":true
            },
            "age":{
                "type":"integer"
            },
            "date":{
                "type":"date",
                "format":"yyyy-MM-dd HH:mm:ss || yyyy-MM-dd || epoch_millis"
            }
        }
    }
}

#添加文档，如果没有创建索引，es默认会创建
POST people/_doc/8
{
    "location":{ 
    "lat": 31.12,
    "lon": -41.15  },
    "name":"liufang",
    "desc":"最近业务需求需要实现搜索的多个词在同段或者同句的功能，索引是按照同段落入到es的，自己实现了分词器来设置同句的position，同段的position在建立索引时设置，下面展示当时遇到的难点.比如搜索词：未就 股权转让 个人所得税, 在未就这个词落入到es会变成两个词项(term)未和就，股权转让和个人所得税有全粒度的分词(即原词在分词中)，原先实现通过match_phrase实现，查询query如下",
    "country":"中国",
    "age":38,
    "date":"2020-05-02 10:00:00"
}
GET people/_search
#删除文档
DELETE people/_doc/1

#更新文档，有2种，一种put
#方式一,这种不推荐，默认其他字段会置空
PUT people/_doc/1
{
   "name":"我爱你中国"
}

#方式二,推荐，只修改部分内容，其他字段不变,另外upsert是存在就更新，不存在就插入
POST people/_update/1
{
    "doc":{
       "name":"我爱你中国"
    }
}
#更新，如果不存在，增加字段
POST people/_update/1/
{
    "doc":{
        "postdate":"20191226T20:34",
        "message":"This test update"
    }
}

#查询索引people所有数据
GET people/_search

#URISearch,根据字段name查询
GET people/_search?q=name:你好

#GET过滤字段，只显示name
GET people/_search?_source=name
#POST过滤字段
POST people/_search
{
    "query":{
        "match_all":{

        }
    },
    "_source":[
        "name",
        "desc",
        "age"
    ]
}
#from和size 达到分页效果
POST /people/_search
{
    "_source":[
        "name",
        "desc"
    ],
    "from":1,
    "size":3,
    "query":{
        "match_all":{

        }
    }
}
#分词匹配
POST /people/_search
{
    "_source":[
        "name",
        "desc"
    ],
    "query":{
        "match":{
            "name":"你好中国"
        }
    }
}
#精确匹配
POST /people/_search
{
    "_source":[
        "name",
        "desc"
    ],
    "query":{
        "term":{
            "name":"中国"
        }
    }
}
#_mget批量读取
POST /people/_mget
{
    "docs":[
        {
            "_index":"people",
            "_id":4
        },
        {
            "_index":"people",
            "_id":1
        }
    ]
}
#排序sort
POST people/_search
{
    "query":{
        "match_all":{

        }
    },
    "_source":[
        "name",
        "desc",
        "age"
    ],
    "sort":[{"age":"desc"}]
}
#排序sort与missing,将没有值的结果放最后
POST people/_search
{
    "query":{
        "match_all":{

        }
    },
    "_source":[
        "name",
        "desc",
        "age"
    ],
    "sort":[
        {
            "age":{
                "order":"asc",
                "missing":"_last"
            }
        }
    ]
}

#BULK APIBULK API是一次调用中执行多种操作，这样节省网络开销
#支持以下4种操作：Create、Update、Index、Delete

#模板传参查询
POST people/_search/template
{
    "inline":{
        "query":{
            "match":{
                "{{field1}}":"{{value1}}"
            }
        },
        "size":"{{size1}}"
    },
    "params":{
        "field1":"name",
        "value1":"中国",
        "size1":10
    }
}

#match_phrase和slop，间隔2个字也能搜索
GET /people/_doc/_search
{
    "query": {
        "match_phrase": {
            "desc": {
                "query": "美浙",
                "slop":  2
            }
        }
    }
}

#match_phrase_prefix和match_phrase类似，max_expansions指定最大匹配多少记录，提高效率
GET /people/_doc/_search
{
    "query":{
        "match_phrase_prefix":{
            "desc":{
                "query":"hello w",
                "max_expansions":2,
                "slop":2
            }
        }
    },
    "_source":[
        "desc"
    ]
}
#multi_match，多字段查询，通过minimum_should_match可以设置匹配度
GET /people/_doc/_search
{
  "query": {
    "multi_match": {
      "query": "美丽的江西",
      "type": "best_fields",
      "fields": [
        "name",
        "desc"
      ],
      "tie_breaker": 0.3,
      "minimum_should_match": "100%"
    }
  }
}
#query_string必须同时包含PATHEON和LEO,而simple_query_string则是将其分词成Pantheon,AND和LEO,默认的operator为OR,所以只要出现这一个

PUT test/_doc/1
{
  "name":"Pantheon Pan",
  "address":"Shanghai province,Leo Pan is not here"
}

PUT test/_doc/2
{
  "name":"Leo Pan",
  "address":"Beijing province,Pantheon Pan is not here"
}

GET /test/_doc/_search
{
  "query": {
    "query_string": {
      "query": "Beijing AND province"
    }
  }
}

GET /test/_doc/_search
{
  "query": {
    "simple_query_string": {
      "query": "Beijing AND province"
    }
  }
}




GET /test/_doc/_search
{
  "query": {
    "simple_query_string": {
      "query": "Beijing + province"
    }
  }
}

#用minimum_should_match
GET /test/_doc/_search
{
  "query": {
    "query_string": {
      "query": "Beijing province",
     
      "minimum_should_match": "100%"
    }
  }
}
GET people/_mapping
PUT people/_mapping
{
  
}
#trem查询，精确匹配
POST people/_search
{
    "query": {
        "term": {
            "name": "美丽"
           
        }
    }
}

#trems查询匹配多个
POST people/_search
{
    "query": {
        "terms": {
          "name": ["台州","杭州"]
        }
        }
   
}

#range范围查询，gte大于或等于，gt大于，lte小于或等于，lt小于，boost权重默认1.0
POST people/_search
{
    "query": {
        "range": {
          "date":{
            "gte":"2020-05-01 11:00:00"
          }
        }
      }
}
#prefix前缀匹配查询
POST people/_search
{
  "query": {
    "prefix":{
      "name":{
        "value":"高",
        "boost":2
      }
    }
  }
}

#wildcard通配符查询，*和?，*表示多个任意字符，?表示一个任意字符
POST people/_search
{
  "query": {
    "wildcard": {
      "desc": {
        "value": "*女"
      }
    }
  }
}
#regexp正则查询
POST people/_search
{
  "from": 0,
  "size": 10, 
  "query": {
    "regexp": {
      "name": {
        "value":"[刘]",
        "boost":1
      }
    }
  }
}
#复合查询bool,必须包含名字为刘芳，过滤掉描述为libin女友的文档
POST people/_search
{

  "query": {
    "bool": {
      "must": {
        "match":{
          "name":"刘芳"
        }
      },
        "filter": {
        "term":{
          "desc":"libin"
        }
      }
    }
  }
}

#boosting，positive预期相关的值和negative_boost预期不想关的词
POST people/_search
{
    "query": {
        "boosting" : {
            "positive" : {
                "term" : {
                    "name" : "刘"
                }
            },
            "negative" : {
                 "term" : {
                     "name" : "李"
                }
            },
            "negative_boost" : 0.5
        }
    }
}

#跨度查询
POST people/_search
{
    "query": {
      "span_term": {
        "name": {
          "value": "高圆圆"
        }
      }
    }
}

GET people/_search
{
    "query": {
        "span_near" : {
            "clauses" : [
                { "span_term" : { "name" : "gaoyuanyuan" } },
                { "span_term" : { "name" : "libin" } },
                { "span_term" : { "name": "李" } }
            ],
            "slop" : 2,
            "in_order" : false
        }
    }
}

GET people/_search
{
    "query": {
         "span_containing" : {
        "little" : {
            "span_term" : { "desc" : "是" }
        },
        "big" : {
            "span_near" : {
                "clauses" : [
                    { "span_term" : { "desc": "索引" } },
                    { "span_term" : { "desc" : "按照" } }
                ],
                "slop" : 10,
                "in_order" : true
            }
        }
    }
    }
}

PUT eams_role_dataperm
{
    "mappings":{
        "properties":{
            "dataperm_uuid":{
                "type":"keyword"
            },
            "mouldstruc_uuid":{
                "type":"keyword"
            },
            "type":{
                "type":"keyword"
            },
            "roleId":{
                "type":"keyword"
            },
            "dangan_id":{
                "type":"keyword"
            },
            "data_scope":{
                "type":"text"
            },
            "item_browsing":{
                "type":"boolean"
            },
            "item_edit":{
                "type":"boolean"
            },
            "item_delete":{
                "type":"boolean"
            },
            "file_browsing":{
                "type":"boolean"
            },
            "file_download":{
                "type":"boolean"
            },
            "file_print":{
                "type":"boolean"
            },
            "archives":{
                "type":"nested",
                "properties":{
                    "archives_id":{
                        "type":"keyword"
                    },
                    "title":{
                        "type":"text",
                        "analyzer":"ik_max_word"
                    },
                    "code":{
                        "type":"text",
                        "analyzer":"ik_max_word"
                    },
                    "head":{
                        "type":"text",
                        "analyzer":"ik_max_word"
                    },
                    "create_time":{
                        "type":"date",
                        "format":"yyyy-MM-dd HH:mm:ss || yyyy-MM-dd || epoch_millis"
                    },
                    "update_time":{
                        "type":"date",
                        "format":"yyyy-MM-dd HH:mm:ss || yyyy-MM-dd || epoch_millis"
                    },
                    "is_delete":{
                        "type":"boolean"
                    },
                    "storage_period":{
                        "type":"text"
                    }
                }
            }
        }
    }
}

POST eams_role_dataperm/_doc/1
{
    "dataperm_uuid":"1",
    "mouldstruc_uuid":"1",
    "type":"1",
    "roleId":"1",
    "dangan_id":"1000",
    "data_scope":"",
    "item_browsing":true,
    "item_edit":true,
    "item_delete":true,
    "file_browsing":true,
    "file_download":true,
    "file_print":true,
    "archives":[
        {
            "dangan_id":"1000",
            "title":"深入java虚拟机",
            "code":"zj001",
            "head":"libin",
            "create_time":"2020-05-05 12:33:00",
            "update_time":"2020-05-05 12:33:00",
            "is_delete":false,
            "storage_period":"3年"
        }
    ]
}

GET eams_role_dataperm/_search

POST eams_role_dataperm/_doc/2
{
    "dataperm_uuid":"2",
    "mouldstruc_uuid":"1",
    "type":"1",
    "roleId":"1",
    "dangan_id":"1000",
    "data_scope":"",
    "item_browsing":false,
    "item_edit":false,
    "item_delete":true,
    "file_browsing":true,
    "file_download":true,
    "file_print":true,
    "archives":[
        {
            "dangan_id":"1000",
            "title":"深入java虚拟机",
            "code":"zj001",
            "head":"libin",
            "create_time":"2020-05-05 12:33:00",
            "update_time":"2020-05-05 12:33:00",
            "is_delete":false,
            "storage_period":"3年"
        }
    ]
}

POST eams_role_dataperm/_doc/3
{
    "dataperm_uuid":"3",
    "mouldstruc_uuid":"1",
    "type":"1",
    "roleId":"2",
    "dangan_id":"1000",
    "data_scope":"",
    "item_browsing":true,
    "item_edit":true,
    "item_delete":true,
    "file_browsing":true,
    "file_download":true,
    "file_print":true,
    "archives":[
        {
            "dangan_id":"1000",
            "title":"深入java虚拟机",
            "code":"zj001",
            "head":"libin",
            "create_time":"2020-05-05 12:33:00",
            "update_time":"2020-05-05 12:33:00",
            "is_delete":false,
            "storage_period":"3年"
        }
    ]
}


POST eams_role_dataperm/_doc/4
{
    "dataperm_uuid":"4",
    "mouldstruc_uuid":"1",
    "type":"1",
    "roleId":"2",
    "dangan_id":"1000",
    "data_scope":"",
    "item_browsing":true,
    "item_edit":true,
    "item_delete":true,
    "file_browsing":true,
    "file_download":true,
    "file_print":true,
    "archives":[
        {
            "dangan_id":"1000",
            "title":"深入java虚拟机",
            "code":"zj001",
            "head":"libin",
            "create_time":"2020-05-05 12:33:00",
            "update_time":"2020-05-05 12:33:00",
            "is_delete":false,
            "storage_period":"3年"
        }
    ]
}

POST eams_role_dataperm/_doc/5
{
    "dataperm_uuid":"5",
    "mouldstruc_uuid":"2",
    "type":"1",
    "roleId":"1",
    "dangan_id":"1001",
    "data_scope":"",
    "item_browsing":true,
    "item_edit":true,
    "item_delete":true,
    "file_browsing":true,
    "file_download":true,
    "file_print":true,
    "archives":[
        {
            "dangan_id":"1001",
            "title":"effect java",
            "code":"zj002",
            "head":"libin",
            "create_time":"2020-05-05 12:33:00",
            "update_time":"2020-05-05 12:33:00",
            "is_delete":false,
            "storage_period":"3年",
            "num":"001",
             "metadata":"元数据"
        }
    ]
}

#根据roleId查询
GET eams_role_dataperm/_search
{
    "query": {
        "terms": {
          "roleId": ["1","2"]
        }
      }
   
}

#组合查询
GET eams_role_dataperm/_search
{
    "query":{
        "bool":{
            "must":[
                {
                    "terms":{
                        "roleId":[
                            "1",
                            "2"
                        ]
                    }
                },
                {
                    "terms":{
                        "mouldstruc_uuid":[
                            "1",
                            "2"
                        ]
                    }
                },
                {
                    "nested":{
                        "path":"archives",
                        "query":{
                            "bool":{
                                "must":[
                                    {
                                        "match":{
                                            "archives.dangan_id":"1001"
                                        }
                                    },
                                    {
                                        "match":{
                                            "archives.title":"effect"
                                        }
                                    }
                                ]
                            }
                        }
                    }
                }
            ]
        }
    }
}

#根据关键词搜索nested内所有字段值，只要有一个字段匹配就符合：query_string
GET eams_role_dataperm/_search
{
    "query":{
        "bool":{
            "must":[
                {
                    "terms":{
                        "roleId":[
                            "1",
                            "2"
                        ]
                    }
                },
                {
                    "terms":{
                        "mouldstruc_uuid":[
                            "1",
                            "2"
                        ]
                    }
                },
                {
                    "nested":{
                        "path":"archives",
                        "query":{
                            "bool":{
                                "must":[
                                    {
                                        "match":{
                                            "archives.dangan_id":"1001"
                                        }
                                    },
                                    {
                                        "query_string":{
                                            "query":"libin"
                                        }
                                    }
                                ]
                            }
                        }
                    }
                }
            ]
        }
    }
}

#索引的重建
#例如现在有一个档案索引，需要将title的keyword类型改成text类型，使用_reindex对索引重建
#1，原有的索引如下
PUT /achieves
{
	"mappings": {
		"properties": {
			"title": {
				"type": "keyword"
			},
				"code": {
				"type": "keyword"
			}
		}
	}
}
#2，加入几条测试数据
PUT /achieves/_doc/1
{
  "title":"古典书籍",
  "code":"1000"
}
PUT /achieves/_doc/2
{
  "title":"历史书籍",
  "code":"1001"
}
PUT /achieves/_doc/3
{
  "title":"经济书籍",
  "code":"1002"
}

#重建前查询书籍，返回null
GET /achieves/_search
{
  "query": {"match": {
    "title": "书籍"
  }}
}

#3,开始重建，首先新增⼀个索引
PUT /achieves_new
{
	"mappings": {
		"properties": {
			"title": {
				"type": "text"
			},
				"code": {
				"type": "keyword"
			}
		}
	}
}
#4,将旧索引数据复制到新索引
#同步执行
POST /_reindex
{
	"source": {
		"index": "achieves"
	},
	"dest": {
		"index": "achieves_new"
	}
}
#异步执行，如果数据量大，加上wait_for_completion=false 件，这样 reindex 将直接返回 taskId
POST /_reindex?wait_for_completion=false
{
	"source": {
		"index": "achieves"
	},
	"dest": {
		"index": "achieves_new"
	}
}
#获取task列表
GET _tasks 
#5，替换别名，size还可指定迁移数据量
POST /_aliases
{
	"actions": [{
			"add": {
				"index": "achieves_new",
				"alias": "achieves_latest"
			}
		},
		{
			"remove": {
				"index": "achieves",
				"alias": "achieves_latest"
			}
		}
	]
}
#删除旧索引
DELETE achieves
#通过别名访问新索引
POST /achieves_latest/_search
{
    "query": {
      "match": {
         "title": "历史书籍"
        }
    }
}

GET achieves_latest/_mapping

#索引文件优化和查询优化，1分片数量、节点数量、合理规划映射、合理使用分词
#查询优化来说合理规划索引和分片来提高查询效率，另外路由选择也是经常用到的
#路由使用
PUT /achieves_latest/_doc/4?routing=achieves_routing&refresh=true
{
    "title":"社科书籍",
    "code":"1003"
}
GET achieves_latest/_search
GET achieves_latest/_doc/4?routing=achieves_routing
POST /achieves_latest,content/_search
{
    "query": {
      "match": {
         "title": "历史书籍"
        }
    }
}
本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。