Use logstash to migrate es data

I won’t introduce the background or anything like that. . Everyone should know

It is more suitable for migrating data, because if it is used for synchronization, logstash will continue to transmit data. After the transmission is completed, it will automatically start the second transmission to overwrite the data at the end.

First install the plugin

logstash-plugin install logstash-input-elasticsearch

logstash-plugin install logstash-output-elasticsearch

It's okay if it's offline

1.先找一台没有网络限制的服务器在线安装
bin/logstash-plugin install logstash-input-elasticsearch


2.在没有网络限制的服务器上导出
bin/logstash-plugin prepare-offline-pack --overwrite --output logstash-input-elasticsearch.zip logstash-input-elasticsearch


3.把压缩包传到有网络限制的服务器上,执行安装命令
bin/logstash-plugin install file:///usr/local/src/logstash-input-elasticsearch.zip

Edit a logstash.conf file


input {
    elasticsearch {
        hosts => "1.1.1.1:9200"
        index => "*" # * 代表同步所有索引,可以具体到一个索引
        docinfo => true 
        #设置为true,将会提取ES文档的元数据信息,例如index、type和id。
        user => ""
        password => ""
        query => '{ "query": { "match": { "statuscode": 200 } }, "sort": [ "_doc" ] }'#查询语句都能用
        size => 5000
        scroll => "5m"
        schedule => "* * * * *" #每分钟触发抽取,默认一次query抽取完数据后,worker会自动关闭
      }
}

filter{
     mutate {
        remove_field => ["@version"] #去除版本号
     }
     mutate{
        add_field => ["typeCode","%{[@metadata][_type]}"] #type替换成typeCode
     } 
     mutate{
        replace => { "env" => "huawei"}
     }     
}

output {
    elasticsearch {
        hosts => ["http://2.2.2.2:9200"]
        user => "elastic"
        password => "your_password"
        index => "%{[@metadata][_index]}" #output中设置为%{[@metadata][_index]},表示匹配元数据中的index,即迁移前后两台服务器ES的索引相同。
        document_type => "%{[@metadata][_type]}"
#表示匹配元数据中索引的type,即迁移前后ES服务器的索引类型相同。
        document_id => "%{[@metadata][_id]}"
#表示匹配元数据中文档的id,即迁移前后ES服务器的文档id相同。
    }
}

Give a real example

input {
    elasticsearch {
        hosts => "10.8.1.11:9200" #源es
        index => "oss-test-2023-03-04" #测试的index
        docinfo => true
        user => "elastic"
        password => "test"
        query => '{ "query": {"bool": { "must_not": { "exists": {"field": "env" } } } }}' #过滤规则是env这个字段不存在
        size => 5000
        scroll => "5m"
        schedule => "* * * * *"
      }
}

filter{
     mutate {
        remove_field => ["@version","@timestamp"]  #去掉logstash自动添加的参数
        replace => { "env" => "test"}  #给传过去的数据添加一个字段。内容是test
     }
}

output {
    elasticsearch {
        hosts => ["http://10.1.1.11:9200"]
        user => "manage"
        password => "test"
        action => "update"
        doc_as_upsert = > true  #这两条修改默认的导入方式。不然如果是用来同步的话会反复进行导入
        index => "%{[@metadata][_index]}"  #根据原来的信息填写对端的信息
        document_type => "%{[@metadata][_type]}"
        document_id => "%{[@metadata][_id]}"
    }
}

Just start logstash

Guess you like

Origin blog.csdn.net/h952520296/article/details/129304198