MySQL数据实时增量同步到Kafka - Flume

# 数据来源

sync.sources=s-1

# 数据通道

sync.channels=c-1

# 数据去处,这里配置了failover,根据下面的优先级配置,会先启用k-1,k-1挂了后再启用k-2

sync.sinks=k-1 k-2

#这个是配置failover的关键,需要有一个sink group

sync.sinkgroups=g-1

sync.sinkgroups.g-1.sinks=k-1 k-2

#处理的类型是failover

sync.sinkgroups.g-1.processor.type= failover

#优先级,数字越大优先级越高,每个sink的优先级必须不相同

sync.sinkgroups.g-1.processor.priority.k-1=5

sync.sinkgroups.g-1.processor.priority.k-2=10

#设置为10秒,当然可以根据你的实际状况更改成更快或者很慢

sync.sinkgroups.g-1.processor.maxpenalty=10000

########## 数据通道的定义

# 数据量不大,直接放内存。其实还可以放在JDBC,kafka或者磁盘文件等

sync.channels.c-1.type= memory

# 通道队列的最大长度

sync.channels.c-1.capacity=100000

# putList和takeList队列的最大长度,sink从capacity中抓取batchsize个event,放到这个队列。所以此参数最好比capacity小,比sink的batchsize大。

# 官方定义:The maximum number of events the channel will take from a source or give to a sink per transaction.

sync.channels.c-1.transactionCapacity=1000

sync.channels.c-1.byteCapacityBufferPercentage=20

### 默认值的默认值等于JVM可用的最大内存的80%,可以不配置

# sync.channels.c-1.byteCapacity=800000

#########sql source#################

# source s-1用到的通道,和sink的通道要保持一致,否则就GG了

sync.sources.s-1.channels=c-1

######### For each one of the sources, the type is defined

sync.sources.s-1.type= org.keedio.flume.source.SQLSource

sync.sources.s-1.hibernate.connection.url=jdbc:mysql://192.168.1.10/testdb?useSSL=false

######### Hibernate Database connection properties

sync.sources.s-1.hibernate.connection.user= test

sync.sources.s-1.hibernate.connection.password=123456

sync.sources.s-1.hibernate.connection.autocommit= true

sync.sources.s-1.hibernate.dialect= org.hibernate.dialect.MySQL5Dialect

sync.sources.s-1.hibernate.connection.driver_class= com.mysql.jdbc.Driver

sync.sources.s-1.run.query.delay=10000

sync.sources.s-1.status.file.path=/home/test/apache-flume-1.8.0-bin/status

# 用上${YYYYMM}环境变量,是因为我用的测试表示一个月表,每个月的数据会放到相应的表里。使用方式见上面的启动说明

sync.sources.s-1.status.file.name= test_${YYYYMM}.status

######## Custom query

sync.sources.s-1.start.from= 0

sync.sources.s-1.custom.query=select * from t_test_${YYYYMM} where id > $@$ order by id asc

sync.sources.s-1.batch.size=100

sync.sources.s-1.max.rows=100

sync.sources.s-1.hibernate.connection.provider_class= org.hibernate.connection.C3P0ConnectionProvider

sync.sources.s-1.hibernate.c3p0.min_size=5

sync.sources.s-1.hibernate.c3p0.max_size=20

######### sinks 1

# sink k-1用到的通道,和source的通道要保持一致,否则取不到数据

sync.sinks.k-1.channel=c-1

sync.sinks.k-1.type= org.apache.flume.sink.kafka.KafkaSink

sync.sinks.k-1.kafka.topic=sync-test

sync.sinks.k-1.kafka.bootstrap.servers=localhost:9092

sync.sinks.k-1.kafka.producer.acks=1

# 每批次处理的event数量

sync.sinks.k-1.kafka.flumeBatchSize=100

######### sinks 2

# sink k-2用到的通道,和source的通道要保持一致,否则取不到数据

sync.sinks.k-2.channel=c-1

sync.sinks.k-2.type= org.apache.flume.sink.kafka.KafkaSink

sync.sinks.k-2.kafka.topic=sync-test

sync.sinks.k-2.kafka.bootstrap.servers=localhost:9092

sync.sinks.k-2.kafka.producer.acks=1

sync.sinks.k-2.kafka.flumeBatchSize=100

猜你喜欢

转载自www.cnblogs.com/shujutongbugongju/p/11021280.html