Flink window four major window functions

In the previous article, the blogger shared the four major assigners of flink, and those who need it, click on the link https://blog.csdn.net/qq_44962429/article/details/112912432 for inquiries, welcome to point out problems, exchange and share!

This article mainly introduces flink's window functions

window functions

When the system determines that the window is ready, it will call Window Functions to implement aggregation calculations on the window. Common Window Functions have the following form: ReduceFunction, AggregateFunction, FoldFunctionor ProcessWindowFunction| WindowFunction(古董|旧版).

(1)ReduceFunction

class SumReduceFunction extends ReduceFunction[(String,Int)]{
    
    
  override def reduce(v1: (String, Int), v2: (String, Int)): (String, Int) = {
    
    
    (v1._1,v1._2+v2._2)
  }
}
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",9999)
	.flatMap(_.split("\\s+"))
	.map((_,1))
	.keyBy(0)
	.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
	.reduce(new SumReduceFunction)// .reduce((v1,v2)=>(v1._1,v1._2+v2._2))
	.print()
env.execute("window")

(2) AggregateFunction

class SumAggregateFunction extends AggregateFunction[(String,Int),(String,Int),(String,Int)]{
    
    
  override def createAccumulator(): (String,Int) = {
    
    
    ("",0)
  }
  override def merge(a: (String,Int), b: (String,Int)): (String,Int) = {
    
    
    (a._1,a._2+b._2)
  }
  override def add(value: (String, Int), accumulator: (String,Int)): (String,Int) = {
    
    
    (value._1,accumulator._2+value._2)
  }
  override def getResult(accumulator: (String,Int)): (String, Int) = {
    
    
    accumulator
  }
}
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("CentOS",9999)
	.flatMap(_.split("\\s+"))
	.map((_,1))
	.keyBy(0)
	.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
	.aggregate(new SumAggregateFunction)
	.print()
env.execute("window")

(3) FoldFunction

class SumFoldFunction  extends  FoldFunction[(String,Int),(String,Long)]{
    
    
  override def fold(accumulator: (String, Long), value: (String, Int)): (String, Long) = {
    
    
    (value._1,accumulator._2+value._2)
  }
}
var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",8877)
	.flatMap(_.split("\\s+"))
	.map((_,1))
	.keyBy(0)
	.window(TumblingProcessingTimeWindows.of(Time.seconds(1)))
	//.fold(("",0L),new SumFoldFunction)
	.fold(("",0L))((acc,v)=>(v._1,acc._2+v._2))
	.print()
env.execute("window")

(4) ProcessWindowFunction

var env=StreamExecutionEnvironment.getExecutionEnvironment
env.socketTextStream("centos",7788)
	.flatMap(_.split("\\s+"))
	.map((_,1))
	.keyBy(_._1)
	.window(SlidingProcessingTimeWindows.of(Time.seconds(4),Time.seconds(2)))
	.process(new ProcessWindowFunction[(String,Int),(String,Int),String,TimeWindow]{
    
    
	    override def process(key: String, context: Context,
	                         elements: Iterable[(String, Int)],
	                         out: Collector[(String,Int)]): Unit = {
    
    
	        val results = elements.reduce((v1,v2)=>(v1._1,v1._2+v2._2))
	        out.collect(results)
	    }
	}).print()
env.execute("window")

ProcessWindowFunction can obtain information about the window, such as the start time, end time, etc. of the window, as well as globalState() and windowState()

  • globalState(), Allows access to the keying state of the window, not limited to it
  • windowState(), Allowed access is also limited to the keyed state of the window
var env=StreamExecutionEnvironment.getExecutionEnvironment

val globalTag = new OutputTag[(String,Int)]("globalTag")

val countsStream = env.socketTextStream("centos", 7788)
	.flatMap(_.split("\\s+"))
	.map((_, 1))
	.keyBy(_._1)
	.window(TumblingProcessingTimeWindows.of(Time.seconds(4), Time.seconds(2)))
	.process(new ProcessWindowFunction[(String, Int), (String, Int), String, TimeWindow] {
    
    
	    var wvds: ValueStateDescriptor[Int] = _
	    var gvds: ValueStateDescriptor[Int] = _
	
	    override def open(parameters: Configuration): Unit = {
    
    
	        wvds = new ValueStateDescriptor[Int]("window-value", createTypeInformation[Int])
	        gvds = new ValueStateDescriptor[Int]("global-value", createTypeInformation[Int])
	    }
	
	    override def process(key: String, context: Context,
	                         elements: Iterable[(String, Int)],
	                         out: Collector[(String, Int)]): Unit = {
    
    
	        val total = elements.map(_._2).sum
	        val ws = context.windowState.getState(wvds)
	        val gs=context.globalState.getState(gvds)
	        val historyWindowValue = ws.value()
	        val historyGlobalValue = gs.value()
	        out.collect((key, historyWindowValue + total))
	        context.output(globalTag, (key, historyGlobalValue + total))
	        ws.update(historyWindowValue + total)
	        gs.update(historyGlobalValue + total)
	    }
	})
	countsStream.print("窗口统计")
	countsStream.getSideOutput(globalTag).print("全局输出")
env.execute("window")

Note : ProcessWindowFunction loops through each element of the window, which is very inefficient. In actual production, it can also be used with other functions.

(5)ReduceFunction+ProcessWindowFunction

var env=StreamExecutionEnvironment.getExecutionEnvironment

val globalTag = new OutputTag[(String,Int)]("globalTag")

val countsStream = env.socketTextStream("centos", 7788)
	.flatMap(_.split("\\s+"))
	.map((_, 1))
	.keyBy(_._1)
	.window(TumblingProcessingTimeWindows.of(Time.seconds(4), Time.seconds(2)))
	.reduce(new SumReduceFunction,new ProcessWindowFunction[(String, Int), (String, Int), String, TimeWindow] {
    
    
	    override def process(key: String, context: Context,
	                         elements: Iterable[(String, Int)],
	                         out: Collector[(String, Int)]): Unit = {
    
    
	        val total = elements.map(_._2).sum
	        out.collect((key, total))
	    }
	})
	countsStream.print("窗口统计")
	countsStream.getSideOutput(globalTag).print("全局输出")
env.execute("window")

(5) FoldFunction+ProcessWindowFunction

var env=StreamExecutionEnvironment.getExecutionEnvironment
val countsStream = env.socketTextStream("centos", 7788)
.flatMap(_.split("\\s+"))
.map((_, 1))
.keyBy(_._1)
.window(TumblingProcessingTimeWindows.of(Time.seconds(4), Time.seconds(2)))
.fold(("",0L),new SumFoldFunction,new ProcessWindowFunction[(String, Long), (String, Long), String, TimeWindow] {
    
    
    override def process(key: String, context: Context,
                         elements: Iterable[(String, Long)],
                         out: Collector[(String, Long)]): Unit = {
    
    
        val total = elements.map(_._2).sum
        out.collect((key, total))
    }
}).print()
env.execute("window")

(6) WindowFunction (not commonly used)

Generally use ProcessWindowFunction instead

env.socketTextStream("centos",7788)
.flatMap(_.split("\\s+"))
.map((_,1))
.keyBy(_._1) //不能按照position进行keyBy()
.window(TumblingProcessingTimeWindows.of(Time.seconds(1)))
.apply(new WindowFunction[(String,Int),(String,Int),String,TimeWindow] {
    
    
    override def apply(key: String,
                       window: TimeWindow,
                       input: Iterable[(String, Int)],
                       out: Collector[(String, Int)]): Unit = {
    
    
        out.collect((key,input.map(_._2).sum))
    }
}).print()
env.execute("window")

Guess you like

Origin blog.csdn.net/qq_44962429/article/details/112912875