直接上代码,还有问题():
使用 EXACTLY_ONCE模式的时候写入有问题。
暂时没解决,还有就是依赖冲突问题,
kafka-client 跟 flink-connector-kafka_2.11的冲突
package sjb; import com.alibaba.fastjson.JSONObject; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; import javax.annotation.Nullable; import java.util.HashMap; import java.util.Map; import java.util.Properties; /** * @author: Created By lujisen * @company ChinaUnicom Software JiNan * @date: 2020-04-25 18:41 * @version: v1.0 * @description: com.hadoop.ljs.flink110.kafka */ public class KafkaDeserializerSchemaTest { public static void main(String[] args) throws Exception { /*环境初始化*/ StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment(); /*启用checkpoint,这里我没有对消息体的key value进行判断,即使为空启动了checkpoint,遇到错误也会无限次重启*/ senv.enableCheckpointing(2000); /*topic2不存在话会自动在kafka创建,一个分区 分区名称0*/ FlinkKafkaConsumer<ConsumerRecord<String, String>> myConsumer = new FlinkKafkaConsumer<ConsumerRecord<String, String>>( "ods_test2", new MyKafkaDeserializationSchema(), getKafkaConfig()); /*指定消费位点*/ // Map<KafkaTopicPartition, Long> specificStartOffsets = new HashMap<>(); /*这里从topic3 的0分区的第一条开始消费*/ // specificStartOffsets.put(new KafkaTopicPartition("topic3", 0), 0L); // myConsumer.setStartFromSpecificOffsets(specificStartOffsets); /** * 真实数据: * {"topic":"sink_topic1","age":11,"address":"北京","time":1511935328} * {"topic":"sink_topic1","age":22,"address":"北京22","time":1511935328} * {"topic":"sink_topic2","age":44,"address":"上海","time":1511935328} * {"topic":"sink_topic2","age":33,"address":"上海2","time":1511935328} */ myConsumer.setStartFromEarliest(); DataStream<ConsumerRecord<String, String>> source = senv.addSource(myConsumer); DataStream<String> keyValue = source.map(new MapFunction<ConsumerRecord<String, String>, String>() { @Override public String map(ConsumerRecord<String, String> message) throws Exception { // return "key" + message.key() + " value:" + message.value(); return message.value(); } }); keyValue.print("========="); // FlinkKafkaProducer<String> producer = new FlinkKafkaProducer<String>("test11",new SimpleStringSchema(),getProducer()); FlinkKafkaProducer<String> producer = new FlinkKafkaProducer<String>( "", new KafkaSerializationSchema<String>() { @Override public ProducerRecord<byte[], byte[]> serialize(String str, @Nullable Long aLong) { JSONObject jsonObject = JSONObject.parseObject(str); String topic = jsonObject.getString("topic"); return new ProducerRecord<byte[], byte[]>(topic, str.getBytes()); } }, getProducer(), // FlinkKafkaProducer.Semantic.EXACTLY_ONCE); //todo 这里是有EXACTLY_ONCE 是有问题的。 FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); producer.setWriteTimestampToKafka(true); // myProducer.setLogFailuresOnly(false); keyValue.addSink(producer); /*打印结果*/ /*启动执行*/ senv.execute(); } public static Properties getKafkaConfig() { Properties props = new Properties(); props.setProperty("bootstrap.servers", "dev-ct6-dc-worker01:9092,dev-ct6-dc-worker02:9092,dev-ct6-dc-worker03:9092"); props.setProperty("group.id", "test1"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); // props.setProperty("auto.offset.reset", "latest"); return props; } public static Properties getProducer() { Properties props = new Properties(); props.put("bootstrap.servers", "dev-ct6-dc-worker01:9092,dev-ct6-dc-worker02:9092,dev-ct6-dc-worker03:9092"); props.setProperty("compression.type", "snappy"); props.setProperty("linger.ms", "1"); props.setProperty("transaction.timeout.ms", 1000 * 60 * 5 + ""); props.put("acks", "all"); props.put("retries", 3); // props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); // props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); return props; } }