There are a few minutes to register, lounge currently in Harbin to Beijing. Due back late at night, the next day no time to update the article busy, crowded time to tidy up a bit.
Flink how to achieve three simultaneous real-time streams join? The whole idea is this:
• Set the same time type • set the same window of time when this will reach the same window, three real-time streams simultaneously triggered.
Since flink does not support three simultaneous real-time streams join, you need to first join the results of two real-time streaming completed, talk to the third real-time streaming join.
import java.util
import SessionIdKeyedProcessFunction.MyTimeTimestampsAndWatermarks
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.{AssignerWithPeriodicWatermarks, AssignerWithPunctuatedWatermarks}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
object FlinkWindow {
class MyTimeTimestampsAndWatermarks extends AssignerWithPeriodicWatermarks[(String,Int)] with Serializable{
//生成时间戳
val maxOutOfOrderness = 3500L // 3.5 seconds
var currentMaxTimestamp: Long = _
override def extractTimestamp(element: (String,Int), previousElementTimestamp: Long): Long = {
val timestamp = System.currentTimeMillis()
currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp)
timestamp
}
override def getCurrentWatermark(): Watermark = {
// return the watermark as current highest timestamp minus the out-of-orderness bound
new Watermark(currentMaxTimestamp - maxOutOfOrderness);
}
}
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime)
val input = env.socketTextStream("localhost", 9001)
val inputMap = input.flatMap(f => {
f.split("\\W+")
}).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())
inputMap.print()
val input1 = env.socketTextStream("localhost", 9002)
val inputMap1 = input1.flatMap(f => {
f.split("\\W+")
}).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())
inputMap1.print()
val input2 = env.socketTextStream("localhost", 9003)
val inputMap2 = input2.flatMap(f => {
f.split("\\W+")
}).map(line =>(line ,1)).assignTimestampsAndWatermarks(new MyTimeTimestampsAndWatermarks())
inputMap2.print()
val aa = inputMap.join(inputMap1).where(_._1).equalTo(_._1).window(TumblingProcessingTimeWindows.of(Time.seconds(6)))
.apply{(t1:(String,Int),t2:(String,Int), out : Collector[(String,Int,Int)])=>
out.collect(t1._1,t1._2,t2._2)
}
aa.print()
val cc = aa.join(inputMap2).where(_._1).equalTo(_._1).window(TumblingProcessingTimeWindows.of(Time.seconds(6)))
.apply{(t1:(String,Int,Int),t2:(String,Int), out : Collector[(String,Int,Int,Int)])=>
out.collect(t1._1,t1._2,t1._3,t2._2)
}
cc.print()
env.execute()
}
}
leftjoin, rightjoin due flink official website did not specify implementations, join operators can not be achieved, we need to realize leftjoin cogroup and rightjoin, we can refer to this change what you can
import util.source.StreamDataSource1;
import util.source.StreamDataSource;
import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
public class FlinkTumblingWindowsLeftJoinDemo {
public static void main(String[] args) throws Exception {
int windowSize = 10;
long delay = 5100L;
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
// 设置数据源
DataStream<Tuple3<String, String, Long>> leftSource = env.addSource(new StreamDataSource()).name("Demo Source");
DataStream<Tuple3<String, String, Long>> rightSource = env.addSource(new StreamDataSource1()).name("Demo Source");
// 设置水位线
DataStream<Tuple3<String, String, Long>> leftStream = leftSource.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Tuple3<String, String, Long>>(Time.milliseconds(delay)) {
@Override
public long extractTimestamp(Tuple3<String, String, Long> element) {
return element.f2;
}
}
);
DataStream<Tuple3<String, String, Long>> rigjhtStream = rightSource.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Tuple3<String, String, Long>>(Time.milliseconds(delay)) {
@Override
public long extractTimestamp(Tuple3<String, String, Long> element) {
return element.f2;
}
}
);
// join 操作
leftStream.coGroup(rigjhtStream)
.where(new LeftSelectKey()).equalTo(new RightSelectKey())
.window(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
.apply(new LeftJoin())
.print();
env.execute("TimeWindowDemo");
}
public static class LeftJoin implements CoGroupFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>> {
@Override
public void coGroup(Iterable<Tuple3<String, String, Long>> leftElements, Iterable<Tuple3<String, String, Long>> rightElements, Collector<Tuple5<String, String, String, Long, Long>> out) {
for (Tuple3<String, String, Long> leftElem : leftElements) {
boolean hadElements = false;
for (Tuple3<String, String, Long> rightElem : rightElements) {
out.collect(new Tuple5<>(leftElem.f0, leftElem.f1, rightElem.f1, leftElem.f2, rightElem.f2));
hadElements = true;
}
if (!hadElements) {
out.collect(new Tuple5<>(leftElem.f0, leftElem.f1, "null", leftElem.f2, -1L));
}
}
}
}
public static class LeftSelectKey implements KeySelector<Tuple3<String, String, Long>, String> {
@Override
public String getKey(Tuple3<String, String, Long> w) {
return w.f0;
}
}
public static class RightSelectKey implements KeySelector<Tuple3<String, String, Long>, String> {
@Override
public String getKey(Tuple3<String, String, Long> w) {
return w.f0;
}
}
Want to see more dry goods share technology giant? Please pay attention to the public under the number, reply , "the Spark" , "Flink" , "machine learning" , "front end" to get massive learning materials.