Elasticsearch 的 API (java / scale)

Xiao Bian here first need to rely pom.xml available to everyone :( modify according to their own version)

<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <spark.version>2.3.2</spark.version>
    </properties>

    <dependencies>
        <dependency><!-- 依赖管理,有选择的继承-->
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>transport</artifactId>
            <version>6.2.0</version>
        </dependency>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>2.10.3</version>
        </dependency>
        <dependency>
            <groupId>org.json</groupId>
            <artifactId>json</artifactId>
            <version>20180813</version>
        </dependency>
        <dependency>
            <groupId>org.elasticsearch</groupId>
            <artifactId>elasticsearch-hadoop</artifactId>
            <version>6.2.4</version>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>${spark.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${spark.version}</version>
        </dependency>
    </dependencies>

1. Create a programming entry ES

  Mainly to provide a Utils, ES program entry created by reading the configuration file.
# elasticSearch.conf

elastic.host=192.168.130.131
elastic.port=9300
elastic.cluster.name=zzy-application

#Constants

public interface Constants {
    String ELASTIC_HOST = "elastic.host";
    String ELASTIC_PORT="elastic.port";
    String ELASTIC_CLUSTER_NAME = "elastic.cluster.name";
}

#ElasticSearchUtil

import com.zy.es.constant.Constants;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.util.Properties;

/**
 * 一般情况下的工具类都是单例
 * 里面若干方法一般都是static
 * 如果在连接集群的时候,集群的名称对应不上:
 *  NoNodeAvailableException[None of the configured nodes are available:
 */
public class ElasticSearchUtil {
    private static TransportClient client;
    private static Properties ps;
    static {
        try {
            InputStream resourceAsStream = ElasticSearchUtil.class.getClassLoader().getResourceAsStream("elasticsearch.conf");
            ps =new Properties();
            ps.load(resourceAsStream);
            String host=ps.getProperty(Constants.ELASTIC_HOST);
            int port = Integer.parseInt(ps.getProperty(Constants.ELASTIC_PORT));
            String clusterName=ps.getProperty(Constants.ELASTIC_CLUSTER_NAME);
            Settings settings =Settings.builder()
                    .put("cluster.name",clusterName)
                    .build();
            client=new PreBuiltTransportClient(settings);
            //这里可以有多个,集群模式
            TransportAddress ta=new TransportAddress(
                    InetAddress.getByName(host),
                    port
            );
            //addTransportAddresses(TransportAddress... transportAddress),参数为一个可变参数
            client.addTransportAddresses(ta);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public static TransportClient getTransportClient(){
        return client;
    }
    public static void close(TransportClient client){
        if(client!=null){
            client.close();
        }
    }
}

2. Create an index

  Xiao Bian here to provide a json, map, javabean, XContentBuilder four kinds created.

import java.util

import com.zy.es.pojo.Book
import com.zy.es.utils.ElasticSearchUtil
import org.elasticsearch.action.index.IndexResponse
import org.elasticsearch.cluster.metadata.MetaData.XContentContext
import org.elasticsearch.common.xcontent.{XContentBuilder, XContentType}
import org.elasticsearch.common.xcontent.json.JsonXContent
import org.json.JSONObject

object createIndex {
  private var index="library"
  private var `type`="books"
  private val client = ElasticSearchUtil.getTransportClient()
  def main(args: Array[String]): Unit = {
    createIndexByJson()
    //createIndexByMap()
   // createIndexByBean()
   // createIndexByXContentBuilder()
    //关闭es连接对象
    ElasticSearchUtil.close(client)
  }

  /**
    * 1.通过json方式创建
    * java.lang.IllegalArgumentException: The number of object passed must be even but was [1]
    * 在es5.x以上,使用XContentType.JSON来制定即可
    *setSource(json.toString(),XContentType.JSON)  必须指定第二个参数。
    */
  def createIndexByJson()={
    val json=new JSONObject
    json.put("name","我爱你中国")
    json.put("author","周迅")
    json.put("date","2018-6-6")
    //返回创建后的结果
    var response: IndexResponse = client.prepareIndex(index, `type`, "9")
      .setSource(json.toString, XContentType.JSON).get()
    //查看版本
    println(response.getVersion)
  }

  /**
    * 2.map方式
    */
  def createIndexByMap(): Unit ={
    val sourceMap=new util.HashMap[String,String]()
    sourceMap.put("name","朝花夕拾")
    sourceMap.put("author","鲁迅")
    sourceMap.put("date","2009-4-5")

    var response: IndexResponse = client.prepareIndex(index, `type`, "2").setSource(sourceMap)
      .get()
    //查看版本
    println(response.getVersion)
  }

  /**
    * 3.使用普通的javabean
    */
  def createIndexByBean()={
    val book:Book=new Book("斗破苍穹","天蚕土豆","2012-2-6");
    val json=new JSONObject(book)
    //返回创建后的结果
    var response: IndexResponse = client.prepareIndex(index, `type`, "3")
      .setSource(json.toString, XContentType.JSON).get()
    //查看版本
    println(response.getVersion)
  }

  /**
    * 4.XContentBuilder方式
    */
  def createIndexByXContentBuilder()={
    var builder: XContentBuilder = JsonXContent.contentBuilder()
    builder.startObject()
      .field("name","西游记")
      .field("author","吴承恩")
      .field("version","1.0")
      .endObject()
    var response: IndexResponse = client.prepareIndex(index, `type`,"4").setSource(builder)
      .get()
    println(response.getVersion)
  }
}

3. Delete Data & Data & update batch processing

  Xiao Bian here delete data, update data, batch operation.

import java.util

import com.zy.es.utils.ElasticSearchUtil
import org.elasticsearch.action.bulk.BulkResponse
import org.elasticsearch.action.delete.DeleteResponse
import org.elasticsearch.action.update.{UpdateRequestBuilder, UpdateResponse}
import org.elasticsearch.common.xcontent.{XContentBuilder, XContentType}
import org.elasticsearch.common.xcontent.json.JsonXContent
import org.json.JSONObject

object ElasticsearchCRUD {
  private var index="library"
  private var `type`="books"
  private val client = ElasticSearchUtil.getTransportClient()
  def main(args: Array[String]): Unit = {
    //删除数据
    testDelete()
    //更新
    //testUpdate()
    //批量操作
    //testBulk()

    //关闭连接对象
    ElasticSearchUtil.close(client)
  }
  //删除数据
  def testDelete()={
    var response: DeleteResponse = client.prepareDelete(index, `type`, "2").get()
    println("version:"+response.getVersion)
  }
  //更新
  def testUpdate()={
    var builder: XContentBuilder = JsonXContent.contentBuilder()
    builder.startObject()
      .field("version","3.0")
      .endObject()
    var response: UpdateResponse  = client.prepareUpdate(index, `type`, "4")
      .setDoc(builder).get()
    println("version:"+response.getVersion)
  }

  //批量操作
  def testBulk()={
    val map=new util.HashMap[String,String]()
    map.put("name","无双")
    map.put("author","周润发")
    map.put("version","2")
    val json=new JSONObject
    json.put("name","红楼梦")
    json.put("author","曹雪芹")
    json.put("version","1.0")
    var responses: BulkResponse = client.prepareBulk().add(client.prepareIndex(index, `type`, "7")
      .setSource(map))
      .add(client.prepareIndex(index, `type`, "8").setSource(json.toString(),XContentType.JSON))
      .get()
    for(response <-responses.getItems){
      print(response.getVersion)
    }
  }
}

4. The full-text index, the index page, highlight

import java.util

import com.zy.es.utils.ElasticSearchUtil
import org.elasticsearch.action.search.{SearchResponse, SearchType}
import org.elasticsearch.index.query.QueryBuilders
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder
import org.elasticsearch.search.{SearchHit, SearchHits}
import org.json.JSONObject

import scala.collection.JavaConversions

object testSearch {
  private var index="library"
  private var `type`="books"
  private val client = ElasticSearchUtil.getTransportClient()
  def main(args: Array[String]): Unit = {
    //全文索引
    //fullTextSearch()
    //分页索引
    //pagingSearch()
    //高亮索引
    highlightSearch()
  }
  //全文索引
  def fullTextSearch()={
    val json=new JSONObject()
    val response = client.prepareSearch(index) //设置检索的类型
      .setSearchType(SearchType.DEFAULT) //设置检索的类型
      .setQuery(QueryBuilders.matchQuery("author", "天蚕土豆")) //设置检索方式
      .get()

    val hits = response.getHits  //获取检索结果
    println("totals:"+hits.getTotalHits)  //检索出的数据的个数
    println("maxSource"+hits.getMaxScore) //最大的得分
    //查询的具体的内容
    val myhits = hits.getHits
    for(hit <- myhits){
      val index = hit.getIndex
      val id = hit.getId
      val `type` = hit.getType
      val source =hit.getSourceAsString
      val score=hit.getScore
      json.put("_index",index)
      json.put("_id",id)
      json.put("_type",`type`)
      json.put("_score", score )
      json.put("_source",new JSONObject(source))
      println(json.toString())
    }
  }

  //分页索引
  //分页查询:查询第num页,查count条   每一页的长度*(num-1)+count
  def pagingSearch(from:Int=0,size:Int=10)={
    var response: SearchResponse = client.prepareSearch(index)
      .setSearchType(SearchType.QUERY_THEN_FETCH)
      .setQuery(QueryBuilders.matchQuery("name", "西游记"))
      .setFrom(from)
      .setSize(size)
      .get()
    val myhits: SearchHits = response.getHits
    val total=myhits.totalHits
    println("zzy为您查询出"+total+"记录:")
    val hits: Array[SearchHit] = myhits.getHits
    for (hit<-hits){
      val map: util.Map[String, AnyRef] = hit.getSourceAsMap
      val author=map.get("author")
      val name=map.get("name")
      val version=map.get("version")
      print(
        s"""
           |author:${author}
           |name:${name}
           |version:${version}
         """.stripMargin)
    }
  }

  //高亮索引
   def highlightSearch()={
     val response=client.prepareSearch(index)
       .setSearchType(SearchType.DEFAULT)
       .setQuery(QueryBuilders.matchQuery("author","周润发"))
       .highlighter(new HighlightBuilder()
         .field("author")//给哪个字段添加标签
         .preTags("<font color='red' size='20px'>")//添加的前置标签
         .postTags("</font>"))//添加的后置标签
            .get()
     val myHits = response.getHits
     val total = myHits.totalHits
     println("zzy为您查询出" + total + "记录:")
     val hits: Array[SearchHit] = myHits.getHits
     for(hit <-hits){
       //注意这里如果想要获取高亮的字段,必须使用高亮的方式获取
       val HLfields = hit.getHighlightFields
       //这里的field是设置高亮的字段名:author  highlight查询的所有的字段值(含高亮的)
       for((field,highlight)<-JavaConversions.mapAsScalaMap(HLfields)){
         var date=""
         val fragments=highlight.getFragments
         for(fragment <-fragments){
           date+=fragment.toString
         }
         print(date)
       }
     }
   }
}

The Chinese word

(1) error demo

First, we now add some of your own data ES cluster:

#创建索引库
curl -H "Content-Type: application/json" -XPUT 'http://192.168.130.131:9200/chinese'
#添加数据
curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/1 -d'{"content":"美国留给伊拉克的是个烂摊子吗"}'
curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/2 -d'{"content":"公安部:各地校车将享最高路权"}'
curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/3 -d'{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}'
curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/4 -d'{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}'

# Then use different query to see results:

import com.zy.es.utils.ElasticSearchUtil
import org.elasticsearch.action.search.{SearchResponse, SearchType}
import org.elasticsearch.index.query.QueryBuilders

object ChineseParticipleSearch {
  private var index="chinese"
  private var `type`="fulltext"
  private val client = ElasticSearchUtil.getTransportClient()
  def main(args: Array[String]): Unit = {
    val response: SearchResponse =client.prepareSearch(index)
      .setSearchType(SearchType.QUERY_THEN_FETCH)
      .setQuery(QueryBuilders.matchQuery("content","中国"))
      .get()
    val myHits = response.getHits.getHits
    for(hit <- myHits){
      println(hit.getSourceAsString)
    }
  }
}

Note : we use here match the query, the query is "Chinese"
look at the results:
Elasticsearch 的 API (java / scale)
here is why the United States will check out?
This is because: native query 'China' is the word after separation during retrieval, query errors index figure above situation occurs.
Then how are we to do it, I just want to check out the contents related to China, ah, never mind Chinese word to help you solve.

(2) ES configuration Chinese word

  Common Chinese word plugins: IK, Paodingjieniu Chinese word, and so on. Here we use IK word.
① Download: https://github.com/medcl/elasticsearch-analysis-ik version corresponds
Elasticsearch 的 API (java / scale)
② use maven source code is compiled (at IK_HOME) :( mvn Clean the install -DskipTests)
③ the compiled target / releases the zip file copied to ES_HOME / plugins / analysis-ik directory, and then extract the version ES plugin-descriptor.properties therein and plugin-security.policy file to use their version
④ modify ES_HOME / config / elasticsearch .yml file, add (ES6.x above do not need to do this) index.analysis.analyzer.default.type: IK
⑤ restart es service
here small a bit rude:
# ps--aux | grep elasticsearch
# kill -9 pid
# / ES_HOME / bin / elasticsearch -d start

(3) re-tested

The first step : before the data will be deleted
curl -XDELETE ' http://192.168.130.131:9200/chinese/1 '
curl -XDELETE ' http://192.168.130.131:9200/chinese/2 '
curl -XDELETE ' HTTP : //192.168.130.131: 9200 / chinese /. 3 '
curl -XDELETE' http://192.168.130.131:9200/chinese/4 '
Step : reload the data, and set IK word
# ik word set
curl -XPOST http://192.168.130.131:9200/chinese/fulltext/_mapping -H 'the type-the Content: file application / JSON' -d '
{
"Properties": {
"Content": {
"type": "text",
"Analyzer": "ik_max_word",
"search_analyzer":"ik_max_word"
}
}
} "

    #添加数据
    curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/1 -d'{"content":"美国留给伊拉克的是个烂摊子吗"}'
    curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/2 -d'{"content":"公安部:各地校车将享最高路权"}'
    curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/3 -d'{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}'
    curl -H "Content-Type: application/json" -XPOST http://192.168.130.131:9200/chinese/fulltext/4 -d'{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}'

The third step :
re-run just above code, here we look at the results:
Elasticsearch 的 API (java / scale)

6.Elasticsearch On Spark

Integration conditions:
ES official website:
https://www.elastic.co/guide/en/elasticsearch/hadoop/current/install.html
Maven dependent: https://mvnrepository.com/artifact/org.elasticsearch/elasticsearch-hadoop/ 6.2.4

<dependency>
    <groupId>org.elasticsearch</groupId>
    <artifactId>elasticsearch-hadoop</artifactId>
    <version>6.2.4</version>
</dependency>
//如果使用spark中可以读到ES中的数据,需要导入隐式转换
import java.util.Date

import com.zy.es.utils.ElasticSearchUtil
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.elasticsearch.cluster.metadata.MetaData.XContentContext
import org.elasticsearch.common.xcontent.XContentType
import org.elasticsearch.spark._

/**
  * spark整合ES
  * 通过spark去读取es中的数据,同时将操作之后的结果落地到ES
  */

object EsOnSpark {

  private val client = ElasticSearchUtil.getTransportClient()

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setAppName("EsOnSpark")
      .setMaster("local[2]")
      .set("es.index.auto.create", "true") //写数据的时候如果索引库不存在,自动创建
      .set("es.nodes", "192.168.130.131") //设置ES集群的节点
      .set("es.port", "9200") //设置ES集群的端口

    val sc = new SparkContext(conf)
    var EsRDD: RDD[(String, String)] = sc.esJsonRDD("library/books") //指定index/type
    var index = "es-spark"
    var `type` = "book"
    EsRDD.foreach { case (id, json) => {
      client.prepareIndex(index, `type`, new Date().getTime.toString)
        .setSource(json, XContentType.JSON).get()
      println(id + "" + json)
    }
    }
    sc.stop()
  }
}

Here are just small series describes some common API operations, we know that ES greatest strength lies in his inquiry, later small series will further complement API on ElasticSearch powerful query capabilities.

Guess you like

Origin blog.51cto.com/14048416/2411780