Flink示例——Window、EventTime、WaterMark

Flink示例——Window、EventTime、WaterMark

版本信息

产品 版本
Flink 1.7.2
Java 1.8.0_231
Scala 2.11.12

Mavan依赖

  • pom.xml 依赖部分
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-java</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-clients_2.11</artifactId>
        <version>${flink.version}</version>
    </dependency>
    

自定义SourceFunction

  • 提供一个SourceFunction,方便后面测试
    public class CustomSourceFunction extends RichSourceFunction<Tuple2<String, Long>> {
    
        private boolean flag = true;
    
        @Override
        public void run(SourceContext<Tuple2<String, Long>> ctx) throws Exception {
            List<String> data = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
            Random random = new Random();
            while (flag) {
                Thread.sleep(100);
                // 随机取一个值
                String key = data.get(random.nextInt(data.size()));
                long value = System.currentTimeMillis();
                ctx.collect(Tuple2.of(key, value));
            }
        }
    
        @Override
        public void cancel() {
            flag = false;
        }
    
    }
    

Window 示例

  • 时间窗口-滚动
    public class WindowDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    
            // 自定义数据源
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
    		
    		// 滚动窗口、聚合
            customDS.timeWindowAll(Time.seconds(5))
                    .aggregate(new AggregateFunction<Tuple2<String, Long>, HashMap<String, Long>, HashMap<String, Long>>() {
                        @Override
                        public HashMap<String, Long> createAccumulator() {
                            return new HashMap<>();
                        }
    
                        @Override
                        public HashMap<String, Long> add(Tuple2<String, Long> value, HashMap<String, Long> accumulator) {
                            Long v = accumulator.getOrDefault(value.f0, 0L);
                            accumulator.put(value.f0, v + 1);
                            return accumulator;
                        }
    
                        @Override
                        public HashMap<String, Long> getResult(HashMap<String, Long> accumulator) {
                            return accumulator;
                        }
    
                        @Override
                        public HashMap<String, Long> merge(HashMap<String, Long> a, HashMap<String, Long> b) {
                            // 合并2个Map
                            for (Map.Entry<String, Long> entry : a.entrySet()) {
                                String key = entry.getKey();
                                Long value = entry.getValue();
    
                                Long v = b.getOrDefault(key, 0L);
                                b.put(key, v + value);
                            }
                            return b;
                        }
                    }).print();
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
    }
    
  • 时间窗口-滑动
    customDS.timeWindowAll(Time.seconds(5), Time.seconds(3))
    
  • 时间窗口-会话
    customDS.windowAll(EventTimeSessionWindows.withGap(Time.seconds(10)))
    
  • 计数窗口-滚动
    customDS.countWindowAll(1000)
    
  • 计数窗口-滑动
    customDS.countWindowAll(1000, 500)
    
  • windowAll与keyBy.*Window
    • windowAll 是对所有数据设置一个大的窗口,示例 customDS.timeWindowAll(Time.seconds(5))
    • keyBy.*Window 是按key分组后,分别对每个key设置一个小的窗口,示例customDS.keyBy(0).timeWindow(Time.seconds(5))

EventTime、WaterMark

  • 代码 EventTimeWatermarkDemo
    public class EventTimeWatermarkDemo {
    
        public static void main(String[] args) {
            StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
            // 配置启用EventTime
            env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
            // 修改自动获取水印的间隔时间。默认200ms,查看env.setStreamTimeCharacteristic(...)源码可知
            // env.getConfig().setAutoWatermarkInterval(1000);
    
            // 自定义数据源
            CustomSourceFunction sourceFunction = new CustomSourceFunction();
            DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
    
            // 查看源码可知,BoundedOutOfOrdernessTimestampExtractor继承于AssignerWithPeriodicWatermarks,它是Assigner的便利实现
            BoundedOutOfOrdernessTimestampExtractor<Tuple2<String, Long>> assigner =
                    new BoundedOutOfOrdernessTimestampExtractor<Tuple2<String, Long>>(Time.seconds(3)/*延迟时间3秒*/) {
                        @Override
                        public long extractTimestamp(Tuple2<String, Long> element) {
                            // 事件时间
                            return element.f1;
                        }
                    };
    
            // 处理
            customDS.assignTimestampsAndWatermarks(assigner) // 设置时间戳、水印
                    .timeWindowAll(Time.seconds(5)) // 滚动窗口
                    .aggregate(aggregateFunction) // 聚合
                    .print();
    
            try {
                env.execute();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    
        private static AggregateFunction<Tuple2<String, Long>, HashMap<String, Long>, HashMap<String, Long>> aggregateFunction =
                new AggregateFunction<Tuple2<String, Long>, HashMap<String, Long>, HashMap<String, Long>>() {
                    @Override
                    public HashMap<String, Long> createAccumulator() {
                        return new HashMap<>();
                    }
    
                    @Override
                    public HashMap<String, Long> add(Tuple2<String, Long> value, HashMap<String, Long> accumulator) {
                        Long v = accumulator.getOrDefault(value.f0, 0L);
                        accumulator.put(value.f0, v + 1);
                        return accumulator;
                    }
    
                    @Override
                    public HashMap<String, Long> getResult(HashMap<String, Long> accumulator) {
                        return accumulator;
                    }
    
                    @Override
                    public HashMap<String, Long> merge(HashMap<String, Long> a, HashMap<String, Long> b) {
                        // 合并2个Map
                        for (Map.Entry<String, Long> entry : a.entrySet()) {
                            String key = entry.getKey();
                            Long value = entry.getValue();
    
                            Long v = b.getOrDefault(key, 0L);
                            b.put(key, v + value);
                        }
                        return b;
                    }
                };
    
    }
    
  • AssignerWithPeriodicWatermarks 示例
    AssignerWithPeriodicWatermarks<Tuple2<String, Long>> assigner = new AssignerWithPeriodicWatermarks<Tuple2<String, Long>>() {
    
        // 当前最大事件时间
        long max = Long.MIN_VALUE + 3000; // 先+3000,防止在更新max之前调了max - 3000,导致出错
    
        @Nullable
        @Override
        public Watermark getCurrentWatermark() {
            // 默认200ms调用一次getCurrentWatermark()
            // 修改方式 看前面env处
    
            // 水印,此处延时3秒
            return new Watermark(max - 3000);
        }
    
        @Override
        public long extractTimestamp(Tuple2<String, Long> element, long previousElementTimestamp) {
            // 每来一条数据,会被调一次
            // 获取eventTime作为时间戳
            Long eventTime = element.f1;
            // 更新最大事件时间
            if (eventTime > max)
                max = eventTime;
    
            return eventTime;
        }
    };
    
  • AssignerWithPunctuatedWatermarks 示例
    AssignerWithPunctuatedWatermarks<Tuple2<String, Long>> assigner = new AssignerWithPunctuatedWatermarks<Tuple2<String, Long>>() {
    
        @Nullable
        @Override
        public Watermark checkAndGetNextWatermark(Tuple2<String, Long> lastElement, long extractedTimestamp) {
            // 在extractTimestamp之后调用
            // 根据数据内容来设置Watermark
            if ("a".equals(lastElement.f0)) {
                return new Watermark(extractedTimestamp - 3000);
            } else {
                return null;
            }
        }
    
        @Override
        public long extractTimestamp(Tuple2<String, Long> element, long previousElementTimestamp) {
            return element.f1;
        }
    };
    
发布了146 篇原创文章 · 获赞 54 · 访问量 17万+

猜你喜欢

转载自blog.csdn.net/alionsss/article/details/104255333