Elasticsearch02-查询语法

转载

mb5fe18e9fef50b 2021-09-05 23:42:00

Query_string(一般不用)

1.timeout
GET /_search?timeout=1s/ms/m
时间结束，就显示出当前时间的数据

2.带参数查询
GET /product/_search?q=name:xiaomi


3.分页、排序
GET /product/_search?from=0&size=3&sort=price:desc
加了排序 _score为null

Query DSL

1.match_all： <查询所有数据>

GET /product/_search
{
  "query": {
    "match_all": {}
  }
}


2.match: <name字段包含'nfc'或者'phone'>
GET /product/_search
{
  "query": {
    "match": {
      "name": "nfc phone"
    }
  }
}


3.sort: <排序>
GET /product/_search
{
  "query": {"match": {
      "name": "xiaomi"
  }}
  ,"sort": [
    {
      "price": {
        "order": "asc"
      }
    }
  ]
}


4.multi_match: <根据多个字段查询一个关键词, name和desc字段中包含“nfc”>
GET /product/_search
{
  "query": {
    "multi_match": {
      "query": "nfc",
      "fields": ["name", "desc"]
    }
  }
}


5._source：<只显示特定字段>
GET /product/_search
{
  "query": {
    "multi_match": {
      "query": "nfc",
      "fields": ["name", "desc"]
    }
  },
  "_source": ["name","desc", "price"]
}



6. 分页查询：<查询第一页（每页两条数据）>
GET /product/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "price": {
        "order": "desc"
      }
    }
  ], 
  "from": 0,
  "size": 2
}

全文检索<Full-text queries>

# 1.query-term 不会分词以'nfc phone'为一个词查询,因为去倒排索引中查询，倒排索引已经将原数据分词，所以查询的时候查询不到
GET /product/_search
{
  "query": {
    "term": { # match可以查询到，因为match会先分好词，再去倒排索引中查询
      "name": {
        "value": "nfc phone"
      }
    }
  }
}

#2. 查询包含nfc或者phone
GET /product/_search
{
  "query": {
    "bool": {
      "must": [ # 相当于 match 'nfc phone'
        {"term":{"name":"nfc"}},
        {"term":{"name":"phone"}}
      ]
    }
  }
}

# 3.terms相当于match
GET /product/_search
{
  "query": {
    "terms": {
      "name": ["nfc","phone"]
    }
  }
}


#测试当前分词器
GET /_analyze
{
  "analyzer": "standard",
  "text": "xiaomi nfc zhineng phone"
}



# Phrase search：<短语搜索，和全文检索相反，“nfc phone”会作为一个短语去检索短语搜索> 其他语法查询去插入的分词表匹配，这个是在原数据匹配
GET /product/_search
{
  "query": {
    "match_phrase": {
      "name": "nfc phone"
    }
  }
}

组合查询

1. must：<必须满足> 必须name字段包含'xiaomi', 且desc字段包含'shouji'
GET /product/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "xiaomi"
        }},
         {"match": {
          "desc": "shouji" #如果字段一样就是name in('xiaomi', 'shouji')
        }}
      ]
    }
  }
}



2. filter：<过滤器 不计算相关度分数，多次查询会cache>, 查询name字段包含'xiaomi'或者'phone'， 且desc字段包含'shouji', 过滤: name字段包含‘nfc’， 且价格在3000-5000
GET /product/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "xiaomi phone"
        }},
         {"match": {
          "desc": "shouji"
        }}
      ],
      "filter": [
        {"match_phrase": {"name": "nfc"}},
        {"range": {
          "price": {
            "gte": 3000, # gte >=   gt >
            "lte": 5000
          }
        }}
      ]
    }
  }
}


3.must_not：<必须不满足 不计算相关度分数>   查询name字段不包含'erji'
GET /product/_search
{
  "query": {
    "bool": {
      "must_not": [
        {"match": {
          "name": "erji"
        }}
      ]
    }
  }
}


4. should：<可能满足 or> minimum_should_match：参数指定should返回的文档必须匹配的子句的数量或百分比。如果bool查询包含至少一个should子句，而没有must或 filter子句，则默认值为1。否则，默认值为0


  4.1  bool多条件 name包含xiaomi 不包含erji 描述里包不包含nfc都可以，价钱要大于等于4999

GET /product/_search
{
  "query": {
"bool":{
#name中必须不能包含“erji”
      "must": [
        {"match": { "name": "xiaomi"}}
      ],
#name中必须包含“xiaomi”
      "must_not": [
        {"match": { "name": "erji"}}
      ],
#should中至少满足0个条件，参见下面的minimum_should_match的解释
      "should": [
        {"match": {
          "desc": "nfc"
        }}
      ], 
#筛选价格大于4999的doc
      "filter": [        
        {"range": {
          "price": {
            "gt": 4999   
          }
        }}
      ]
    }
  }
}

















4.2 #有must,minimum_should_match默认值是0，should失效
GET /product/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "nfc"
        }}
      ],"should": [
        {"range": {
          "price": {
            "gte": 4000
          }
        }}
      ]
    }
  }
}
# 相当于
GET /product/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "nfc"
        }}
      ],"should": [
        {"range": {
          "price": {
            "gte": 4000
          }
        }}
      ],
      "minimum_should_match": 0
    }
  }
}


#触发了
GET /product/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "nfc"
        }}
      ],"should": [
        {"range": {
          "price": {
            "gte": 4000
          }
        }}
      ],
      "minimum_should_match": 1
    }
  }
}


# 指定了minimum_should_match 1， should里面的条件全部触发,should里面是or的关系
GET /product/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "nfc"
        }}
      ],"should": [
        {"range": {
          "price": {
            "gte": 4000
          }
        }
        },{
          "range": {
            "price": {
              "lte": 3000
            }
          }
        }
        
      ],
      "minimum_should_match": 1
    }
  }
}



# 指定了minimum_should_match 2， should里面的条件全部触发,should里面是and的关系
GET /product/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "nfc"
        }}
      ],"should": [
        {"range": {
          "price": {
            "gte": 4000
          }
        }
        },{
          "range": {
            "price": {
              "lte": 3000
            }
          }
        }
        
      ],
      "minimum_should_match": 2
    }
  }
}

练习题

# 想要一台带NFC功能的 或者 小米的手机 但是不要耳机
GET /product/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": { # bool子句没有filter或者must，所以minimum_should_match默认1， should里面的关系是or
                "should":[ #
              {"term": {"name": "xiaomi"}},
              {"term":{"name":"nfc"}}
              ],
              "must_not":[
                {"term": {"name": "xiaomi"}}
                ]
        }
      },
      "boost": 1.2
    }
  }
}


# ②搜索一台xiaomi nfc phone或者一台满足 是一台手机 并且 价格小于等于2999
GET /product/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "should":[
            {"match_phrase":{ #短语搜索
            "name":"xiaomi nfc phone"
          }},
            {
              "bool":{
                "must":[ # 里面的条件全部满足。字段相同是in，不相同是and
                  {"term":{"name":"phone"}},
                  {"range":{"price":{"lt":2999}}}
                  ]
              }
            }]
        }
      },
      "boost": 1.2
    }
  }
}

高亮:<返回的元素带上了html标签>

GET /product/_search
{
  "query": {
    "match_phrase": {
      "name": "nfc phone"
    }
  },
  "highlight": {
    "fields": {
      "name": {}
    }
  }
}

分页问题

eq数据存储在各个分片，查询分页数据前5000条，需要将 5000*分片 数据查询出来，然后在进行排序，数据量大查询特别慢

解决办法：
1.尽量避免深度分页查询
2.使用scroll search(只能上一页,下一页，不适合实时查询)

GET /product/_search?scroll=1m
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "price": {
        "order": "desc"
      }
    }
  ],
  "size": 2
}

GET /_search/scroll
{
  "scroll":"1m",#1分钟后从内存中删除
 #scroll_id是上一个分页查询出来的scroll_id
"scroll_id":"FGluY2x1ZGVfY29udGV4dF91dWlkDXF1ZXJ5QW5kRmV0Y2gBFklRcUM1YnJKUUx1bUF5eE9iX0p0TVEAAAAAAAAQiBZTR0xZSGdGU1RSRzA5MGI5Y181YXpn"
}

Filter缓存原理

1.filter并不是每次执行都会进行cache，而是当执行一定次数的时候才会进行cache一个二进制数组，1表示匹配，0表示不匹配。
2.filter会从优先过滤掉稀疏的数据中，保留匹配的cache的数组
3.filter cache保存的是匹配的结果，不需要再从倒排索引中查找对比，大大提高了查询速度
4.filter一般会在query之前执行，过滤掉一部分数据,从而提高query速度
5.filter不计算相关度分数，在执行效率上教query较高
6.当元数据发生改变时，cache也会更新