ES7.x query statement and java query statement

Article directory

Introduction to ES

Elasticsearch 7.x is a distributed search engine based on Lucene, which provides a distributed full-text search engine that can quickly and accurately search, analyze and store massive data. Key features of Elasticsearch 7.x include:

Distributed architecture: Elasticsearch 7.x is a distributed system that can run on multiple nodes, each node is independent of each other, and the data will be automatically fragmented and copied to multiple nodes, which improves the system performance. Availability and scalability.
Real-time search: Elasticsearch 7.x can quickly index and search massive data, and supports real-time search, that is, when the data changes, the latest results can be searched immediately.
Multiple data types support: Elasticsearch 7.x supports indexing and searching of multiple data types, including text, numbers, dates, geographic locations, etc., which can meet the search needs of various types of data.
Word segmentation and analysis: Elasticsearch 7.x supports text segmentation and analysis, which can automatically decompose text into words, and then index and search, improving the accuracy and efficiency of search.
RESTful API: Elasticsearch 7.x provides an operation mode based on RESTful API, which can interact with Elasticsearch through HTTP requests, which is convenient and easy to use.
Multi-language support: Elasticsearch 7.x supports clients in multiple programming languages, including Java, Python, Ruby, PHP, etc., and you can easily perform indexing and search operations through these clients.
Real-time monitoring and diagnosis: Elasticsearch 7.x provides real-time monitoring and diagnosis tools, which can easily monitor cluster status, index performance, node status, etc., improving the maintainability and reliability of the system.
In short, Elasticsearch 7.x is a powerful, high-performance, and easy-to-use distributed search engine that can meet the needs of data search and analysis of various scales.

Basic Information Query

view all indexes

# 查看所有索引
GET _cat/indices

green open indexName1                                 785t4eK4SiarjIj6d_qDtA 1 0       0  0    283b    283b
green open indexName2                              7WB8hk3cRMKuTUJhKWni9g 1 0       0  0    283b    283b
green open indexName3                                PDJWT_VoS9iWSfEzUe5LEQ 1 0       0  0    283b    283b
green open indexName4                  0qc3VwyvS9SIQ97GbILvJw 1 0       0  0    283b    283b

public Set<String> getAllIndex() {
    
    
    GetAliasesRequest request = new GetAliasesRequest();
    GetAliasesResponse response = null;
    try {
    
    
        response = restHighLevelClient.indices().getAlias(request, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        throw new RuntimeException(e);
    }

    Map<String, Set<AliasMetadata>> aliases = response.getAliases();
    Set<String> strings = aliases.keySet();
    return strings;
}

View index mapping

# 查看mapping
GET indexName/_mapping

{
    
    
  "indexName" : {
    
    
    "mappings" : {
    
    
      "properties" : {
    
    
        "field1" : {
    
    
          "type" : "integer"
        },
        "field2" : {
    
    
          "type" : "keyword"
        },
        "field3" : {
    
    
          "type" : "integer"
        },
        "field4" : {
    
    
          "type" : "long"
        },
        "field5" : {
    
    
          "type" : "keyword"
        },
        "field6" : {
    
    
          "type" : "integer"
        }
     }
   }
}

public Map<String, String> getMapping(String indexName){
    
    
    GetMappingsRequest request = new GetMappingsRequest().indices(indexName);
    GetMappingsResponse response = null;
    try {
    
    
        response = restHighLevelClient.indices().getMapping(request, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        throw new RuntimeException(e);
    }
    Map<String, Object> mappings = response.mappings().get(indexName).getSourceAsMap();
    Map<String, Map<String, Object>> properties = (Map<String, Map<String, Object>>) mappings.get("properties");

    Map<String, String> result = new HashMap<>();
    properties.keySet().forEach(key -> {
    
    
        String value = properties.get(key).get("type").toString();
        System.out.println(key);
        result.put(key, value);
    } );

    return result;
}

Find all values in the index

GET indexName/_search
{
    
    
 "query":{
    
    
   "match_all": {
    
    }
 }
}

{
    
    
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    
    
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    
    
    "total" : {
    
    
      "value" : 10000,
      "relation" : "gte"
    },
    "max_score" : 1.0,
    "hits" : [
      {
    
    
        "_index" : "indexName",
        "_type" : "_doc",
        "_id" : "ngGsfYcB7ZKupvf2r47w",
        "_score" : 1.0,
        "_source" : {
    
    
        	"field1" : "value1",
        	"field2" : "value2"
        }
      },
      
      {
    
    
        "_index" : "indexName",
        "_type" : "_doc",
        "_id" : "ngGsfYcB7ZKupvf2r47w",
        "_score" : 1.0,
        "_source" : {
    
    
        	"field1" : "value1",
        	"field2" : "value2"
        }
      }
   	 ]
   }
}

public List<Map<String, Object>> getAll(String indexName) {
    
    
    SearchRequest searchRequest = new SearchRequest(indexName);
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
            .query(QueryBuilders.matchAllQuery())
            .size(10000);
    searchRequest.source(searchSourceBuilder);
    SearchResponse search = null;
    try {
    
    
        search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        throw new RuntimeException(e);
    }
    SearchHit[] hits = search.getHits().getHits();
    List<Map<String, Object>> result = new ArrayList<>();
    Arrays.stream(hits).iterator().forEachRemaining(oneDoc -> {
    
    
        Map<String, Object> sourceAsMap = (Map<String, Object>) oneDoc.getSourceAsMap();
    
        result.add(sourceAsMap);
    });
    return result;
}

filter

The bool query in ES7.x is a compound query, which can combine multiple queries to perform complex logical operations. The commonly used subqueries in bool queries are as follows:

must query: All subqueries must match to be considered a successful match.
must_not query: All subqueries must not match to be considered a successful match.
Should query: If at least one subquery is matched successfully, the match is successful. Multiple should queries can be specified, and the minimum_should_match parameter can be used to control at least the number of subqueries that need to be matched.
filter query: similar to the must query, but does not calculate the score and is only used to filter documents. These sub-queries can be combined to perform complex logical operations. For example, the combination of must query and should query can be used to realize the query condition of "must match A and B, or match C".

There are various filter conditions under each subquery:

Equal: QueryBuilders.termQuery(fieldName, value)
Value range: QueryBuilders.rangeQuery(fieldName).lt(leftValue).gt(rightValue)
- In addition, there are lte, gte, equals and so on.

The following query statement indicates that the value of the fileTypeEnum field in the query index named indexName must be TEMP, the value of the timestamp field must be between 1680769009720-1680769909720, and the value of the count field must not be 19.

GET /indexName/_search
{
    
    
  "query": {
    
     
    "bool": {
    
    
      "must": [
        {
    
    
          "term": {
    
    
            "fileTypeEnum": {
    
    
              "value": "TEMP"
            }
          }
        },
        {
    
    
          "range": {
    
    
            "timestamp": {
    
    
              "gte": 1680769009720,
              "lte": 1680769909720
            }
          }
        }
      ],
      "must_not": [
        {
    
    
          "term": {
    
    
            "count": {
    
    
              "value": "19"
            }
          }
        }
      ]

    }
  },
  "size": 10000
}

public List<Map<String, Object>> search(String indexName, SearchSourceBuilder searchSourceBuilder) {
    
    
    SearchRequest searchRequest = new SearchRequest(indexName);
    searchRequest.source(searchSourceBuilder);
    SearchResponse search = null;
    try {
    
    
        search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        throw new RuntimeException(e);
    }
    SearchHit[] hits = search.getHits().getHits();
    List<Map<String, Object>> result = new ArrayList<>();
    Arrays.stream(hits).iterator().forEachRemaining(oneDoc -> {
    
    
        Map<String, Object> sourceAsMap = (Map<String, Object>) oneDoc.getSourceAsMap();
        result.add(sourceAsMap);
    });
    return result;
}

After defining searchthe function, you only need to pass in the index name and a SearchSourceBuilder object, where various filter conditions can be added to the SearchSourceBuilder object.

SearchSourceBuilder sourceBuilder = new SearchSourceBuilder()
    .query(QueryBuilders.boolQuery().must(QueryBuilders.termQuery("name", "张三")))
    .query(QueryBuilders.boolQuery().mustNot(QueryBuilders.rangeQuery("timestape")
                                             				.lte(1001)
                                             				.gte(1101)));
List<Map<String, Object>> result = search(indexName, searchSourceBuilder);

fuzzy query

Fuzzy query: QueryBuilders.wildcardQuery(fieldName, queryString)

*means match zero or more of any character;
?means match any character.

GET /indexName/_search
{
    
    
  "query": {
    
    
    "wildcard": {
    
    
      "location": "*123*"
    }
  }
}

{
    
    
  "took" : 4,
  "timed_out" : false,
  "_shards" : {
    
    
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    
    
    "total" : {
    
    
      "value" : 270,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
    
    
        "_index" : "indexName",
        "_type" : "_doc",
        "_id" : "ngGsfYcB7ZKupvf2r47w",
        "_score" : 1.0,
        "_source" : {
    
    
        	"field1" : "value1",
        	"field2" : "value2"
        }
      },
      
      {
    
    
        "_index" : "indexName",
        "_type" : "_doc",
        "_id" : "ngGsfYcB7ZKupvf2r47w",
        "_score" : 1.0,
        "_source" : {
    
    
        	"field1" : "value1",
        	"field2" : "value2"
        }
      }
   	 ]
   }
}

List<String> contextLoads(String indexName) throws IOException {
    
    
     // 模糊搜索
     // *表示匹配任意多个字符（包括零个字符）
     // ?表示匹配任意单个字符
     WildcardQueryBuilder queryWildcard = QueryBuilders.wildcardQuery("location", "*123*");

     SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
             .query(queryWildcard)
             .from(0) // 设置分页
             .size(500) // 默认只显示10个
             .fetchSource(new String[]{
    
    "location"}, null) // 只返回location字段
             ;

    SearchRequest searchRequest = new SearchRequest(indexName);
    searchRequest.source(searchSourceBuilder);
    SearchResponse search = null;
    try {
    
    
        search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        throw new RuntimeException(e);
    }
    SearchHit[] hits = search.getHits().getHits();
    List<String> result = new ArrayList<>();
    Arrays.stream(hits).iterator().forEachRemaining(oneDoc -> {
    
    
        Map<String, Object> sourceAsMap = (Map<String, Object>) oneDoc.getSourceAsMap();
        result.add(sourceAsMap.get("location").toString());
    });
    return result
}

Deduplication

Deduplicate the location field in indexName.

POST /indexName/_search
{
    
    
  "size": 0,
  "aggs": {
    
    
    "distinct": {
    
    
      "terms": {
    
    
        "field": "location"
      }
    }
  }
}

{
    
    
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    
    
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    
    
    "total" : {
    
    
      "value" : 10000,
      "relation" : "gte"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    
    
    "distinct" : {
    
    
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 688,
      "buckets" : [
        {
    
    
          "key" : "30-200-1680592680000",
          "doc_count" : 172
        },
        {
    
    
          "key" : "30-200-1680592801000",
          "doc_count" : 172
        },
        {
    
    
          "key" : "30-200-1680593042000",
          "doc_count" : 172
        },
        {
    
    
          "key" : "30-200-1680593161000",
          "doc_count" : 172
        }
      ]
    }
  }
}

deduplication by aggregation

public List<String>  getDistance() {
    
    
    SearchRequest searchRequest = new SearchRequest(indexName);

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
            .aggregation(AggregationBuilders.terms("distinct_location").field("location").size(10));

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = null;
    try {
    
    
        searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        throw new RuntimeException(e);
    }
    Terms distinctValues = searchResponse.getAggregations().get("distinct_location");

    List<String> collect = distinctValues.getBuckets().stream().map(Terms.Bucket::getKeyAsString).collect(Collectors.toList());
    return collect;
}

Deduplication by query

List<String> getDistance2() throws IOException {
    
    
    SearchRequest searchRequest = new SearchRequest(indexName);
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
            .collapse(new CollapseBuilder("location")) // 根据字段去重
            .from(0) // 设置分页
            .size(10) // 默认只显示10个
            .fetchSource(new String[]{
    
    "location"}, null) // 只返回以上字段
            ;
    searchRequest.source(searchSourceBuilder);
    SearchResponse search = null;
    try {
    
    
        search = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        throw new RuntimeException(e);
    }
    SearchHit[] hits = search.getHits().getHits();
    List<String> result = new ArrayList<>();
    Arrays.stream(hits).iterator().forEachRemaining(oneDoc -> {
    
    
        Map<String, Object> sourceAsMap = (Map<String, Object>) oneDoc.getSourceAsMap();
        result.add(sourceAsMap.get("location").toString());
    });
    return result;
}

deduplication after filtering

GET /indexName/_search
{
    
    
  "query": {
    
    
    "bool": {
    
    
      "must": [
        {
    
    
          "match": {
    
    
            "fileTypeEnum": {
    
    
              "query": "TEMP"
            }
          }
        },
        {
    
    
          "wildcard": {
    
    
            "location": "*13*"
          }
        }
      ]
    }
  },
  "aggs": {
    
    
    "distinct": {
    
    
      "terms": {
    
    
        "field": "location"
      }
    }
  }
}

    String name = "location"; // 名称实际上是 location 字段
    // 根据数据类型选择索引名称
    SearchRequest searchRequest = new SearchRequest().indices(indexName);

    /**
     * 封装查询条件
     */
    BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
    // fieldName1
    if (StringUtils.isNotEmpty(DTO.getSubsystem())) {
    
    
        boolQueryBuilder.must(QueryBuilders.termQuery(fieldName1, rawDataAssetsDTO.getSubsystem()));
    }
    // fieldName1
    if (StringUtils.isNotEmpty(DTO.getSubject())) {
    
    
        boolQueryBuilder.must(QueryBuilders.termQuery(fieldName1, DTO.getSubject()));
    }
    // 模糊查询 location
    if (StringUtils.isNotEmpty(DTO.getLocation())) {
    
    
        boolQueryBuilder.must(QueryBuilders.wildcardQuery(name, "*" + DTO.getLocation() + "*"));
    }

    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
            .query(boolQueryBuilder)
            .aggregation(AggregationBuilders.terms("result").field(name).size(100))
            ;

    searchRequest.source(searchSourceBuilder);
    SearchResponse searchResponse = null;
    try {
    
    
        searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);
    } catch (IOException e) {
    
    
        logger.error("查询ES数据异常：" + e.getMessage());
    }

    Terms distinctValues = searchResponse.getAggregations().get("result");

    List<String> result = distinctValues.getBuckets().stream().map(Terms.Bucket::getKeyAsString).collect(Collectors.toList());
    return result;