Logstash设置ElasticSearch默认分词器

Logstash设置ElasticSearch默认分词器

ElasticSearch:7.2.0

Logstash:7.2.0

1.定义Logstash模板

2.Logstash配置文件

3.查看执行结果

4.测试

对人工智能感兴趣的同学,可以点击以下链接:

现在人工智能非常火爆,很多朋友都想学,但是一般的教程都是为博硕生准备的,太难看懂了。最近发现了一个非常适合小白入门的教程,不仅通俗易懂而且还很风趣幽默。所以忍不住分享一下给大家。点这里可以跳转到教程。

https://www.cbedai.net/u014646662

以mysql导入es数据为例:

1.定义Logstash模板

文件名:logstash.json(这个模板适合7.x的,应该不适合6.x,7.x默认情况下不可以指定索引类型)


{
    "index_patterns": ["*"],
  "order" : 0,
  "version": 1,
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas":0
  },
    "mappings": {
      "date_detection": true,
      "numeric_detection": true,
            "dynamic_templates": [
                {
                    "string_fields": {
                        "match": "*",
                        "match_mapping_type": "string",
                        "mapping": {
                            "type": "text",
                            "norms": false,
                            "analyzer": "ik_max_word",
                            "fields": {
                                "keyword": {
                                    "type": "keyword"
                                }
                            }
                        }
                    }
                }
            ]
    }
}

 "template": "*",是匹配所有索引的意思,

如果只想匹配以test-开头的索引, "template": "test-*"

 "analyzer": "ik_max_word",这里用的是ik分词器,如果想是有其他分词器,在这里修改即可

"match": "*",匹配字段名

 "date_detection": true,识别日期类型
 "numeric_detection": true,识别数字类型

2.Logstash配置文件

# Sample Logstash configuration for creating a simple
# Beats -> Logstash -> Elasticsearch pipeline.

input {
  jdbc {
    jdbc_driver_library => "H:/软件/ES/mysql-connector-java.jar"
	jdbc_driver_class => "com.mysql.jdbc.Driver"
	jdbc_connection_string => "jdbc:mysql://192.168.131.77:3306/test?useUnicode=true&characterEncoding=utf8"
	jdbc_user => "zabbix"
	jdbc_password => "ztx"
	jdbc_paging_enabled => "true"
	#jdbc_page_size => "50000"
	#sql_log_level => warn
	# 防止自动将大小转为小写
    #lowercase_column_names => false
	#last_run_metadata_path => "D:/logstash/last_run.txt"
    # 记录上一次运行记录
    #record_last_run => true
    # 使用字段值
    #use_column_value => true
	# 是否删除记录的数据
    #clean_run => false
    #tracking_column => id
	#追踪字段的类型,默认是数字类型
    #tracking_column_type => "numeric"
	# 设置监听间隔 分、时、天、月、年,全部为*为每分钟都更新
    #schedule => "* * * * *"
    #statement => "SELECT * from test  where id > :sql_last_value"
	statement => "SELECT * from test"
    }
}


output {
  elasticsearch {
    hosts => ["http://node01:9200","http://node02:9200","http://node03:9200"]
    index => "mysql"
	template_name => "logstash"
	template => "D:/logstash/template/logstash.json"
	template_overwrite => true
	manage_template => true
  }
}

a、要提前准备好驱动

b、template_name:模板名称

c、template:模板位置,就是上面的logstash.json位置

d、template_overwrite:模板如果存在,则覆盖

e、manage_template:管理模板

3.查看执行结果:

查看索引mapping

get mysql/_mappings

执行结果:
{
  "mysql" : {
    "mappings" : {
      "dynamic_templates" : [
        {
          "string_fields" : {
            "match" : "*",
            "match_mapping_type" : "string",
            "mapping" : {
              "analyzer" : "ik_max_word",
              "fields" : {
                "keyword" : {
                  "type" : "keyword"
                }
              },
              "norms" : false,
              "type" : "text"
            }
          }
        }
      ],
      "properties" : {
        "@timestamp" : {
          "type" : "date"
        },
        "@version" : {
          "type" : "text",
          "norms" : false,
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "ik_max_word"
        },
        "id" : {
          "type" : "long"
        },
        "name" : {
          "type" : "text",
          "norms" : false,
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "ik_max_word"
        },
        "text" : {
          "type" : "text",
          "norms" : false,
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          },
          "analyzer" : "ik_max_word"
        }
      }
    }
  }
}

从执行结果上看可以看出,默认使用了ik分词器

4.测试

a、查询测试

GET /mysql/_search
{
  "query": {
    "term":{
      "text":"推出"
    }
  }
}


执行结果:
{
  "took" : 3,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.6311548,
    "hits" : [
      {
        "_index" : "mysql",
        "_type" : "_doc",
        "_id" : "FpHywGsBywKESMt_PXbZ",
        "_score" : 1.6311548,
        "_source" : {
          "@timestamp" : "2019-07-05T07:03:47.733Z",
          "name" : "3",
          "text" : "Elasticsearch 推出了“输入即搜索”(search_as_you_type) 功能及更多改进,Kibana 可实现功能层面的访问控制,而 Beats 则新增了大量的数据整合功能。此外,Elastic SIEM 首次亮相,APM 开始提供 .NET 支持,而且 Maps 现在可嵌入到仪表板中",
          "id" : 2,
          "@version" : "1"
        }
      }
    ]
  }
}

b、分词测试

GET mysql/_analyze
{

  "field": "text",
  "text": "Elasticsearch 推出了“输入即搜索”(search_as_you_type) 功能及更多改进,Kibana 可实现功能层面的访问控制,而 Beats 则新增了大量的数据整合功能。此外,Elastic SIEM 首次亮相,APM 开始提供 .NET 支持,而且 Maps 现在可嵌入到仪表板中" 
}

执行结果:

{
  "tokens" : [
    {
      "token" : "elasticsearch",
      "start_offset" : 0,
      "end_offset" : 13,
      "type" : "ENGLISH",
      "position" : 0
    },
    {
      "token" : "推出",
      "start_offset" : 14,
      "end_offset" : 16,
      "type" : "CN_WORD",
      "position" : 1
    },
    {
      "token" : "出了",
      "start_offset" : 15,
      "end_offset" : 17,
      "type" : "CN_WORD",
      "position" : 2
    },
    {
      "token" : "输入",
      "start_offset" : 18,
      "end_offset" : 20,
      "type" : "CN_WORD",
      "position" : 3
    },
    {
      "token" : "即",
      "start_offset" : 20,
      "end_offset" : 21,
      "type" : "CN_CHAR",
      "position" : 4
    },
    {
      "token" : "搜索",
      "start_offset" : 21,
      "end_offset" : 23,
      "type" : "CN_WORD",
      "position" : 5
    },
    {
      "token" : "search_as_you_type",
      "start_offset" : 25,
      "end_offset" : 43,
      "type" : "LETTER",
      "position" : 6
    },
    {
      "token" : "search",
      "start_offset" : 25,
      "end_offset" : 31,
      "type" : "ENGLISH",
      "position" : 7
    },
    {
      "token" : "you",
      "start_offset" : 35,
      "end_offset" : 38,
      "type" : "ENGLISH",
      "position" : 8
    },
    {
      "token" : "type",
      "start_offset" : 39,
      "end_offset" : 43,
      "type" : "ENGLISH",
      "position" : 9
    },
    {
      "token" : "功能",
      "start_offset" : 45,
      "end_offset" : 47,
      "type" : "CN_WORD",
      "position" : 10
    },
    {
      "token" : "能及",
      "start_offset" : 46,
      "end_offset" : 48,
      "type" : "CN_WORD",
      "position" : 11
    },
    {
      "token" : "更多",
      "start_offset" : 48,
      "end_offset" : 50,
      "type" : "CN_WORD",
      "position" : 12
    },
    {
      "token" : "改进",
      "start_offset" : 50,
      "end_offset" : 52,
      "type" : "CN_WORD",
      "position" : 13
    },
    {
      "token" : "kibana",
      "start_offset" : 53,
      "end_offset" : 59,
      "type" : "ENGLISH",
      "position" : 14
    },
    {
      "token" : "可",
      "start_offset" : 60,
      "end_offset" : 61,
      "type" : "CN_CHAR",
      "position" : 15
    },
    {
      "token" : "实现",
      "start_offset" : 61,
      "end_offset" : 63,
      "type" : "CN_WORD",
      "position" : 16
    },
    {
      "token" : "功能",
      "start_offset" : 63,
      "end_offset" : 65,
      "type" : "CN_WORD",
      "position" : 17
    },
    {
      "token" : "层面",
      "start_offset" : 65,
      "end_offset" : 67,
      "type" : "CN_WORD",
      "position" : 18
    },
    {
      "token" : "面的",
      "start_offset" : 66,
      "end_offset" : 68,
      "type" : "CN_WORD",
      "position" : 19
    },
    {
      "token" : "访问",
      "start_offset" : 68,
      "end_offset" : 70,
      "type" : "CN_WORD",
      "position" : 20
    },
    {
      "token" : "控制",
      "start_offset" : 70,
      "end_offset" : 72,
      "type" : "CN_WORD",
      "position" : 21
    },
    {
      "token" : "而",
      "start_offset" : 73,
      "end_offset" : 74,
      "type" : "CN_CHAR",
      "position" : 22
    },
    {
      "token" : "beats",
      "start_offset" : 75,
      "end_offset" : 80,
      "type" : "ENGLISH",
      "position" : 23
    },
    {
      "token" : "则",
      "start_offset" : 81,
      "end_offset" : 82,
      "type" : "CN_CHAR",
      "position" : 24
    },
    {
      "token" : "新增",
      "start_offset" : 82,
      "end_offset" : 84,
      "type" : "CN_WORD",
      "position" : 25
    },
    {
      "token" : "了",
      "start_offset" : 84,
      "end_offset" : 85,
      "type" : "CN_CHAR",
      "position" : 26
    },
    {
      "token" : "大量",
      "start_offset" : 85,
      "end_offset" : 87,
      "type" : "CN_WORD",
      "position" : 27
    },
    {
      "token" : "的",
      "start_offset" : 87,
      "end_offset" : 88,
      "type" : "CN_CHAR",
      "position" : 28
    },
    {
      "token" : "数据",
      "start_offset" : 88,
      "end_offset" : 90,
      "type" : "CN_WORD",
      "position" : 29
    },
    {
      "token" : "整合",
      "start_offset" : 90,
      "end_offset" : 92,
      "type" : "CN_WORD",
      "position" : 30
    },
    {
      "token" : "功能",
      "start_offset" : 92,
      "end_offset" : 94,
      "type" : "CN_WORD",
      "position" : 31
    },
    {
      "token" : "此外",
      "start_offset" : 95,
      "end_offset" : 97,
      "type" : "CN_WORD",
      "position" : 32
    },
    {
      "token" : "elastic",
      "start_offset" : 98,
      "end_offset" : 105,
      "type" : "ENGLISH",
      "position" : 33
    },
    {
      "token" : "siem",
      "start_offset" : 106,
      "end_offset" : 110,
      "type" : "ENGLISH",
      "position" : 34
    },
    {
      "token" : "首次",
      "start_offset" : 111,
      "end_offset" : 113,
      "type" : "CN_WORD",
      "position" : 35
    },
    {
      "token" : "亮相",
      "start_offset" : 113,
      "end_offset" : 115,
      "type" : "CN_WORD",
      "position" : 36
    },
    {
      "token" : "apm",
      "start_offset" : 116,
      "end_offset" : 119,
      "type" : "ENGLISH",
      "position" : 37
    },
    {
      "token" : "开始",
      "start_offset" : 120,
      "end_offset" : 122,
      "type" : "CN_WORD",
      "position" : 38
    },
    {
      "token" : "提供",
      "start_offset" : 122,
      "end_offset" : 124,
      "type" : "CN_WORD",
      "position" : 39
    },
    {
      "token" : "net",
      "start_offset" : 126,
      "end_offset" : 129,
      "type" : "ENGLISH",
      "position" : 40
    },
    {
      "token" : "支持",
      "start_offset" : 130,
      "end_offset" : 132,
      "type" : "CN_WORD",
      "position" : 41
    },
    {
      "token" : "而且",
      "start_offset" : 133,
      "end_offset" : 135,
      "type" : "CN_WORD",
      "position" : 42
    },
    {
      "token" : "maps",
      "start_offset" : 136,
      "end_offset" : 140,
      "type" : "ENGLISH",
      "position" : 43
    },
    {
      "token" : "现在",
      "start_offset" : 141,
      "end_offset" : 143,
      "type" : "CN_WORD",
      "position" : 44
    },
    {
      "token" : "可",
      "start_offset" : 143,
      "end_offset" : 144,
      "type" : "CN_CHAR",
      "position" : 45
    },
    {
      "token" : "嵌入",
      "start_offset" : 144,
      "end_offset" : 146,
      "type" : "CN_WORD",
      "position" : 46
    },
    {
      "token" : "到",
      "start_offset" : 146,
      "end_offset" : 147,
      "type" : "CN_CHAR",
      "position" : 47
    },
    {
      "token" : "仪表板",
      "start_offset" : 147,
      "end_offset" : 150,
      "type" : "CN_WORD",
      "position" : 48
    },
    {
      "token" : "仪表",
      "start_offset" : 147,
      "end_offset" : 149,
      "type" : "CN_WORD",
      "position" : 49
    },
    {
      "token" : "板",
      "start_offset" : 149,
      "end_offset" : 150,
      "type" : "CN_CHAR",
      "position" : 50
    },
    {
      "token" : "中",
      "start_offset" : 150,
      "end_offset" : 151,
      "type" : "CN_CHAR",
      "position" : 51
    }
  ]
}

如果不想在logstash中配置,也可直接添加

打开Kibana,执行以下即可

Post _template/template_default
{
    "index_patterns": ["*"],
  "order" : 0,
  "version": 1,
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas":0
  },
    "mappings": {
      "date_detection": true,
      "numeric_detection": true,
            "dynamic_templates": [
                {
                    "string_fields": {
                        "match": "*",
                        "match_mapping_type": "string",
                        "mapping": {
                            "type": "text",
                            "norms": false,
                            "analyzer": "ik_max_word",
                            "fields": {
                                "keyword": {
                                    "type": "keyword"
                                }
                            }
                        }
                    }
                }
            ]
    }
}
发布了139 篇原创文章 · 获赞 273 · 访问量 666万+

猜你喜欢

转载自blog.csdn.net/u014646662/article/details/94736551