Flink 通过数据字段多sink到不同的topic

直接上代码,还有问题():

使用 EXACTLY_ONCE模式的时候写入有问题。

暂时没解决,还有就是依赖冲突问题,

kafka-client 跟 flink-connector-kafka_2.11的冲突

package sjb;


import com.alibaba.fastjson.JSONObject;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;

import javax.annotation.Nullable;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

/**
 * @author: Created By lujisen
 * @company ChinaUnicom Software JiNan
 * @date: 2020-04-25 18:41
 * @version: v1.0
 * @description: com.hadoop.ljs.flink110.kafka
 */
public class KafkaDeserializerSchemaTest {
    public static void main(String[] args) throws Exception {
        /*环境初始化*/
        StreamExecutionEnvironment senv = StreamExecutionEnvironment.getExecutionEnvironment();
        /*启用checkpoint,这里我没有对消息体的key value进行判断,即使为空启动了checkpoint,遇到错误也会无限次重启*/
        senv.enableCheckpointing(2000);
        /*topic2不存在话会自动在kafka创建,一个分区 分区名称0*/
        FlinkKafkaConsumer<ConsumerRecord<String, String>> myConsumer = new FlinkKafkaConsumer<ConsumerRecord<String, String>>(
                "ods_test2",
                new MyKafkaDeserializationSchema(),
                getKafkaConfig());

        /*指定消费位点*/
//        Map<KafkaTopicPartition, Long> specificStartOffsets = new HashMap<>();
        /*这里从topic3 的0分区的第一条开始消费*/
//        specificStartOffsets.put(new KafkaTopicPartition("topic3", 0), 0L);
//        myConsumer.setStartFromSpecificOffsets(specificStartOffsets);

        /**
         *  真实数据:
         *      {"topic":"sink_topic1","age":11,"address":"北京","time":1511935328}
         *      {"topic":"sink_topic1","age":22,"address":"北京22","time":1511935328}
         *      {"topic":"sink_topic2","age":44,"address":"上海","time":1511935328}
         *      {"topic":"sink_topic2","age":33,"address":"上海2","time":1511935328}
         */
        myConsumer.setStartFromEarliest();
        DataStream<ConsumerRecord<String, String>> source = senv.addSource(myConsumer);


        DataStream<String> keyValue = source.map(new MapFunction<ConsumerRecord<String, String>, String>() {
            @Override
            public String map(ConsumerRecord<String, String> message) throws Exception {
//                return "key" + message.key() + "  value:" + message.value();
                return message.value();
            }
        });
        keyValue.print("=========");
//        FlinkKafkaProducer<String> producer = new FlinkKafkaProducer<String>("test11",new SimpleStringSchema(),getProducer());
        FlinkKafkaProducer<String> producer = new FlinkKafkaProducer<String>(
                "",
                 new KafkaSerializationSchema<String>() {
                    @Override
                    public ProducerRecord<byte[], byte[]> serialize(String str, @Nullable Long aLong) {
                        JSONObject jsonObject = JSONObject.parseObject(str);
                        String topic = jsonObject.getString("topic");
                        return new ProducerRecord<byte[], byte[]>(topic, str.getBytes());
                    }
                },
                getProducer(),
//                FlinkKafkaProducer.Semantic.EXACTLY_ONCE); //todo 这里是有EXACTLY_ONCE 是有问题的。
                FlinkKafkaProducer.Semantic.AT_LEAST_ONCE);
        producer.setWriteTimestampToKafka(true);
//        myProducer.setLogFailuresOnly(false);
        keyValue.addSink(producer);
        /*打印结果*/

        /*启动执行*/
        senv.execute();
    }

    public static Properties getKafkaConfig() {
        Properties props = new Properties();
        props.setProperty("bootstrap.servers", "dev-ct6-dc-worker01:9092,dev-ct6-dc-worker02:9092,dev-ct6-dc-worker03:9092");
        props.setProperty("group.id", "test1");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//        props.setProperty("auto.offset.reset", "latest");
        return props;
    }

    public static Properties getProducer() {
        Properties props = new Properties();
        props.put("bootstrap.servers", "dev-ct6-dc-worker01:9092,dev-ct6-dc-worker02:9092,dev-ct6-dc-worker03:9092");
        props.setProperty("compression.type", "snappy");
        props.setProperty("linger.ms", "1");
        props.setProperty("transaction.timeout.ms", 1000 * 60 * 5 + "");
         props.put("acks", "all");
        props.put("retries", 3);
//        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");


        return props;
    }
}

猜你喜欢

转载自blog.csdn.net/qq_31866793/article/details/107515578
今日推荐