flink wordcount

package com.scn;

import java.util.Properties;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import org.apache.flink.util.Collector;

public class FilnkCostKafka {
    public static void main(String[] args) throws Exception {
        /**
        * 引入Flink StreamExecutionEnvironment
        **/
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        /**
        * 设置监控数据流时间间隔(官方叫状态与检查点)
        **/
        env.enableCheckpointing(1000);
        /**
        * 配置kafka和zookeeper的ip和端口
        **/
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.1.20:9092");
        properties.setProperty("zookeeper.connect", "192.168.1.20:2181");
        properties.setProperty("group.id", "test");
        /**
        * 将kafka和zookeeper配置信息加载到Flink StreamExecutionEnvironment
        **/
        FlinkKafkaConsumer08<String> myConsumer = new FlinkKafkaConsumer08<String>("test", new SimpleStringSchema(),
                properties);
        /**
        * 将Kafka的数据转成flink的DataStream类型
        **/
        DataStream<String> stream = env.addSource(myConsumer);
        /**
        * 实施计算模型并输出结果
        **/
        DataStream<Tuple2<String, Integer>> counts = stream.flatMap(new LineSplitter()).keyBy(0).sum(1);

        counts.print();

        env.execute("WordCount from Kafka data");
    }

    public static final class LineSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
        private static final long serialVersionUID = 1L;

        public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
            String[] tokens = value.toLowerCase().split("\\W+");
            for (String token : tokens) {
                if (token.length() > 0) {
                    out.collect(new Tuple2<String, Integer>(token, 1));
                }
            }
        }
    }

}

参考:https://www.cnblogs.com/jiashengmei/p/9025535.html

DataStream<Tuple2<String, Long>> keyedStream = env
                .addSource(consumer)
                .flatMap(new MessageSplitter())
                .keyBy(0)
                .timeWindow(Time.seconds(10))

                .apply(new WindowFunction<Tuple2<String, Long>, Tuple2<String, Long>, Tuple, TimeWindow>() {
                    @Override
                    public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<String, Long>> input, Collector<Tuple2<String, Long>> out) throws Exception {
                        long sum = 0L;
                        int count = 0;
                        for (Tuple2<String, Long> record: input) {
                            sum += record.f1;
                            count++;
                        }
                        Tuple2<String, Long> result = input.iterator().next();
                        result.f1 = sum / count;
                        out.collect(result);
                    }
                });

 参考 : https://www.cnblogs.com/huxi2b/p/7219792.html

猜你喜欢

转载自blog.csdn.net/jz1993/article/details/81952026