logstash jdbc full update and incremental update

When the relational data is synchronized to ES, it is found that the full amount of data is required for the first synchronization, and then the incremental data needs to be synchronized regularly

Full update:

input {
    
    
    jdbc {
    
    
         #mysql 数据库链接,dataassets为数据库名
        jdbc_connection_string => "jdbc:mysql://192.168.1.185:3306/dataassets"
         #用户名和密码
        jdbc_user => "root"
        jdbc_password => "zhbr@2020"
         #驱动
        jdbc_driver_library => "/opt/bigdata/logstash-6.6.2/sql/mysql-connector-java-5.1.32-bin.jar"
         #驱动类名
        jdbc_driver_class => "com.mysql.jdbc.Driver"
         #是否分页
        jdbc_paging_enabled => "true"
        jdbc_page_size => "50000"
         #直接执行sql语句
        statement => "SELECT * FROM datadictionary"
         ##设置监听间隔  各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟都更新
        schedule => "* * * * *"
        type => "jdbc"
    }

    beats {
    
    
            #ip地址
            host => "192.168.1.185"
            port => 5400
    }
}


#这里我将es自动生成的@timestamp和我mysql库里面的time时间字段时间+8个小时  mysql数据库时间存的是cst时间(东八区),es收入时默认时间是utc时间  相差8个小时
filter {
    
    

	ruby {
    
    
		code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)"
	}
	ruby {
    
    
		code => "event.set('@timestamp',event.get('timestamp'))"
	}
	mutate {
    
    
		remove_field => ["timestamp"]
	}

	ruby {
    
    
		code => "event.set('timestamps', event.get('time').time.localtime + 8*60*60)"
	}
	ruby {
    
    
		code => "event.set('time',event.get('timestamps'))"
	}
	mutate {
    
    
		remove_field => ["timestamps"]
	}
}

output {
    
    

    if [type] == "jdbc" {
    
    
        elasticsearch {
    
    
            #es库的地址
            hosts => ["192.168.1.183:9200"]
            #es库中的_index 可以随便配置
            index => "dataassets"
            #es库中的_type,可以随便配置
            document_type => "dataassets_type"
            #设置数据的id为数据库中的字段,主键
            document_id => "%{id}"
        }

    }
}

Incremental update:

According to the timestamp:

input {
    
    
    jdbc {
    
    
        jdbc_connection_string => "jdbc:mysql://192.168.1.185:3306/dataassets"
        jdbc_user => "root"
        jdbc_password => "zhbr@2020"
        jdbc_driver_library => "/opt/bigdata/logstash-6.6.2/sql/mysql-connector-java-5.1.32-bin.jar"
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        jdbc_paging_enabled => "true"
        jdbc_page_size => "50000"
        statement => "SELECT * FROM datadictionary  where time > :sql_last_value"
        use_column_value => true
        tracking_column => "time"
        record_last_run => true
        tracking_column_type => timestamp
        last_run_metadata_path => "/opt/bigdata/logstash-6.6.2/config/station_parameter.txt"
        schedule => "*/60 * * * * *"
        type => "jdbc"
    }

    beats {
    
    
            host => "192.168.1.183"
            port => 5400
    }
}

filter {
    
    

	ruby {
    
    
		code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)"
	}
	ruby {
    
    
		code => "event.set('@timestamp',event.get('timestamp'))"
	}
	mutate {
    
    
		remove_field => ["timestamp"]
	}

	ruby {
    
    
		code => "event.set('timestamps', event.get('time').time.localtime + 8*60*60)"
	}
	ruby {
    
    
		code => "event.set('time',event.get('timestamps'))"
	}
	mutate {
    
    
		remove_field => ["timestamps"]
	}
}


output {
    
    

    if [type] == "jdbc" {
    
    
        elasticsearch {
    
    
            hosts => ["192.168.1.183:9200"]
            index => "abcd1"
            document_type => "abcd_type"
            document_id => "%{id}"
        }

    }
}

According to the unique primary key id

input {
    
    
    jdbc {
    
    
        jdbc_connection_string => "jdbc:mysql://192.168.1.185:3306/dataassets"
        jdbc_user => "root"
        jdbc_password => "zhbr@2020"
        jdbc_driver_library => "/opt/bigdata/logstash-6.6.2/sql/mysql-connector-java-5.1.32-bin.jar"
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        jdbc_paging_enabled => "true"
        jdbc_page_size => "50000"
        statement => "SELECT * FROM datadictionary  where id> :sql_last_value"
        use_column_value => true
        tracking_column => "id"
        record_last_run => true
        tracking_column_type => numeric
        last_run_metadata_path => "/opt/bigdata/logstash-6.6.2/config/station_parameter.txt"
        schedule => "*/60 * * * * *"
        type => "jdbc"
    }

    beats {
    
    
            host => "192.168.1.183"
            port => 5400
    }
}

filter {
    
    

	ruby {
    
    
		code => "event.set('timestamp', event.get('@timestamp').time.localtime + 8*60*60)"
	}
	ruby {
    
    
		code => "event.set('@timestamp',event.get('timestamp'))"
	}
	mutate {
    
    
		remove_field => ["timestamp"]
	}

	ruby {
    
    
		code => "event.set('timestamps', event.get('time').time.localtime + 8*60*60)"
	}
	ruby {
    
    
		code => "event.set('time',event.get('timestamps'))"
	}
	mutate {
    
    
		remove_field => ["timestamps"]
	}
}


output {
    
    

    if [type] == "jdbc" {
    
    
        elasticsearch {
    
    
            hosts => ["192.168.1.183:9200"]
            index => "abcd1"
            document_type => "abcd_type"
            document_id => "%{id}"
        }

    }
}

Guess you like

Origin blog.csdn.net/weixin_44455388/article/details/107084044