Flink how to achieve three simultaneous real-time streams join, leftjoin, rightjoin

There are a few minutes to register, lounge currently in Harbin to Beijing. Due back late at night, the next day no time to update the article busy, crowded time to tidy up a bit.

Flink how to achieve three simultaneous real-time streams join? The whole idea is this:

Set the same time type set the same window of time when this will reach the same window, three real-time streams simultaneously triggered.

Since flink does not support three simultaneous real-time streams join, you need to first join the results of two real-time streaming completed, talk to the third real-time streaming join.

import java.util	
import SessionIdKeyedProcessFunction.MyTimeTimestampsAndWatermarks	
import org.apache.flink.streaming.api.TimeCharacteristic	
import org.apache.flink.streaming.api.functions.{AssignerWithPeriodicWatermarks, AssignerWithPunctuatedWatermarks}	
import org.apache.flink.streaming.api.scala._	
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment	
import org.apache.flink.streaming.api.watermark.Watermark	
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows	
import org.apache.flink.streaming.api.windowing.time.Time	
import org.apache.flink.util.Collector	
object FlinkWindow {	
  class MyTimeTimestampsAndWatermarks extends AssignerWithPeriodicWatermarks[(String,Int)] with Serializable{	
    //生成时间戳	
    val maxOutOfOrderness = 3500L // 3.5 seconds	
    var currentMaxTimestamp: Long = _	
    override def extractTimestamp(element: (String,Int), previousElementTimestamp: Long): Long = {	
      val timestamp = System.currentTimeMillis()	
      currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp)	
      timestamp	
    }	
    override def getCurrentWatermark(): Watermark = {	
      // return the watermark as current highest timestamp minus the out-of-orderness bound	
      new Watermark(currentMaxTimestamp - maxOutOfOrderness);	
    }	
  }	
  def main(args: Array[String]): Unit = {	
    val env = StreamExecutionEnvironment.getExecutionEnvironment	
    env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)	
    val input = env.socketTextStream("localhost", 9001)	
    val inputMap = input.flatMap(f => {	
      f.split("\\W+")	
    }).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())	

	
    inputMap.print()	
    val input1 = env.socketTextStream("localhost", 9002)	
    val inputMap1 = input1.flatMap(f => {	
      f.split("\\W+")	
    }).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())	
    inputMap1.print()	
    val input2 = env.socketTextStream("localhost", 9003)	
    val inputMap2 = input2.flatMap(f => {	
      f.split("\\W+")	
    }).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())	
    inputMap2.print()	
    val aa = inputMap.join(inputMap1).where(_._1).equalTo(_._1).window(TumblingProcessingTimeWindows.of(Time.seconds(6)))	
    .apply{(t1:(String,Int),t2:(String,Int), out : Collector[(String,Int,Int)])=>	
      out.collect(t1._1,t1._2,t2._2)	
    }	
  aa.print()	
      val cc = aa.join(inputMap2).where(_._1).equalTo(_._1).window(TumblingProcessingTimeWindows.of(Time.seconds(6)))	
      .apply{(t1:(String,Int,Int),t2:(String,Int), out : Collector[(String,Int,Int,Int)])=>	
        out.collect(t1._1,t1._2,t1._3,t2._2)	
      }	
    cc.print()	
    env.execute()	
  }	
}

leftjoin, rightjoin due flink official website did not specify implementations, join operators can not be achieved, we need to realize leftjoin cogroup and rightjoin, we can refer to this change what you can

import util.source.StreamDataSource1;	
import util.source.StreamDataSource;	
import org.apache.flink.api.common.functions.CoGroupFunction;	
import org.apache.flink.api.java.functions.KeySelector;	
import org.apache.flink.api.java.tuple.Tuple3;	
import org.apache.flink.api.java.tuple.Tuple5;	
import org.apache.flink.streaming.api.TimeCharacteristic;	
import org.apache.flink.streaming.api.datastream.DataStream;	
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;	
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;	
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;	
import org.apache.flink.streaming.api.windowing.time.Time;	
import org.apache.flink.util.Collector;	
public class FlinkTumblingWindowsLeftJoinDemo {	
    public static void main(String[] args) throws Exception {	
        int windowSize = 10;	
        long delay = 5100L;	
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();	
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);	
        env.setParallelism(1);	
        // 设置数据源	
        DataStream<Tuple3<String, String, Long>> leftSource = env.addSource(new StreamDataSource()).name("Demo Source");	
        DataStream<Tuple3<String, String, Long>> rightSource = env.addSource(new StreamDataSource1()).name("Demo Source");	
        // 设置水位线	
        DataStream<Tuple3<String, String, Long>> leftStream = leftSource.assignTimestampsAndWatermarks(	
            new BoundedOutOfOrdernessTimestampExtractor<Tuple3<String, String, Long>>(Time.milliseconds(delay)) {	
                @Override	
                public long extractTimestamp(Tuple3<String, String, Long> element) {	
                    return element.f2;	
                }	
            }	
        );	
        DataStream<Tuple3<String, String, Long>> rigjhtStream = rightSource.assignTimestampsAndWatermarks(	
            new BoundedOutOfOrdernessTimestampExtractor<Tuple3<String, String, Long>>(Time.milliseconds(delay)) {	
                @Override	
                public long extractTimestamp(Tuple3<String, String, Long> element) {	
                    return element.f2;	
                }	
            }	
        );	
        // join 操作	
        leftStream.coGroup(rigjhtStream)	
            .where(new LeftSelectKey()).equalTo(new RightSelectKey())	
            .window(TumblingEventTimeWindows.of(Time.seconds(windowSize)))	
            .apply(new LeftJoin())	
            .print();	
        env.execute("TimeWindowDemo");	
    }	
    public static class LeftJoin implements CoGroupFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>> {	
        @Override	
        public void coGroup(Iterable<Tuple3<String, String, Long>> leftElements, Iterable<Tuple3<String, String, Long>> rightElements, Collector<Tuple5<String, String, String, Long, Long>> out) {	
            for (Tuple3<String, String, Long> leftElem : leftElements) {	
                boolean hadElements = false;	
                for (Tuple3<String, String, Long> rightElem : rightElements) {	
                    out.collect(new Tuple5<>(leftElem.f0, leftElem.f1, rightElem.f1, leftElem.f2, rightElem.f2));	
                    hadElements = true;	
                }	
                if (!hadElements) {	
                    out.collect(new Tuple5<>(leftElem.f0, leftElem.f1, "null", leftElem.f2, -1L));	
                }	
            }	
        }	
    }	
    public static class LeftSelectKey implements KeySelector<Tuple3<String, String, Long>, String> {	
        @Override	
        public String getKey(Tuple3<String, String, Long> w) {	
            return w.f0;	
        }	
    }	
    public static class RightSelectKey implements KeySelector<Tuple3<String, String, Long>, String> {	
        @Override	
        public String getKey(Tuple3<String, String, Long> w) {	
            return w.f0;	
        }	
    }

Want to see more dry goods share technology giant? Please pay attention to the public under the number, reply , "the Spark" , "Flink" , "machine learning" , "front end" to get massive learning materials.

640?wx_fmt=jpeg

Published 40 original articles · won praise 3 · Views 9088

Guess you like

Origin blog.csdn.net/huzechen/article/details/102548858