Flink watermark自定义生成机制

周期性生成Watermark

import Source.WaterSensor
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow

//定义样例类
case class WaterSensor(id:String,ts:Long,vc:Double)
object WindowDemo {
    
    
  def main(args: Array[String]): Unit = {
    
    
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //使用数据生成时间
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    //读取端口数据
    val stream: DataStream[String] = env.socketTextStream("192.168.**.**",7777)
	
	//将读取的数据转成WaterSensor类型
    val dataStream: DataStream[WaterSensor] = stream
    .map(x => {
    
    
      val strings: Array[String] = x.split(",")
      WaterSensor(strings(0), strings(1).toLong, strings(2).toDouble)})
    .assignTimestampsAndWatermarks(new MyAssigner) //自定义watermaker

	//使用窗口
     val minDataStream: DataStream[(String, Double)] = dataStream
      .map(data=>(data.id,data.vc))
      .keyBy(_._1)
      .timeWindow(Time.seconds(10),Time.seconds(3))
      .reduce((x,y)=>(x._1,x._2.min(y._2)))
    
    dataStream.print("csh")
    minDataStream.print("minData")

    env.execute("windowDemo")
  }

}

//周期性生成watermark
//继承类:AssignerWithPeriodicWatermarks
//重写方法:getCurrentWatermark、extractTimestamp
class MyAssigner extends AssignerWithPeriodicWatermarks[WaterSensor]{
    
    
  var bound=3000 //延迟关窗时间单位毫秒
  var maxTs= Long.MinValue //观察最大时间戳

  //获取watermark(默认200ms生成一次)
  override def getCurrentWatermark: Watermark = {
    
    
    //生成最新的watermark
    new Watermark(maxTs-bound)
  }

  //提取时间戳,每来一条数据更新一次
  override def extractTimestamp(t: WaterSensor, l: Long): Long ={
    
    
    maxTs=Math.max(t.ts*1000,maxTs) //更新最大值
    t.ts*100 //返回时间戳
  }
}

间断式生成Watermark

import Source.WaterSensor
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow

//定义样例类
case class WaterSensor(id:String,ts:Long,vc:Double)
object WindowDemo {
    
    
  def main(args: Array[String]): Unit = {
    
    
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //使用数据生成时间
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    //读取端口数据
    val stream: DataStream[String] = env.socketTextStream("192.168.**.**",7777)
	
	//将读取的数据转成WaterSensor类型
    val dataStream: DataStream[WaterSensor] = stream
    .map(x => {
    
    
      val strings: Array[String] = x.split(",")
      WaterSensor(strings(0), strings(1).toLong, strings(2).toDouble)})
    .assignTimestampsAndWatermarks(new MyAssigner) //自定义watermaker

	//使用窗口
     val minDataStream: DataStream[(String, Double)] = dataStream
      .map(data=>(data.id,data.vc))
      .keyBy(_._1)
      .timeWindow(Time.seconds(10),Time.seconds(3))
      .reduce((x,y)=>(x._1,x._2.min(y._2)))
    
    dataStream.print("csh")
    minDataStream.print("minData")

    env.execute("windowDemo")
  }

}

//间断式生成watermark
//继承类:AssignerWithPunctuatedWatermarks
//实现方法:checkAndGetNextWatermark、extractTimestamp
class MyAssigner2 extends AssignerWithPunctuatedWatermarks[WaterSensor]{
    
    

  var bound=3000 //延迟关窗时间
  var maxTs= Long.MinValue //观察最大时间戳
  
  //每来一条执行一次,判断是否需要生成watermark
  override def checkAndGetNextWatermark(t: WaterSensor, l: Long): Watermark = {
    
    
    //判断传入的WaterSensor的id%3是否等于0
    if(t.id.toInt%3==0){
    
    
      new Watermark(maxTs-bound)
    }
  }

  //提取时间戳,每来一条执行一次
  override def extractTimestamp(t: WaterSensor, l: Long): Long = {
    
    
    maxTs=Math.max(t.ts*1000,maxTs) //更新最大值
    t.ts*1000
  }
}

猜你喜欢

转载自blog.csdn.net/weixin_38468167/article/details/112094928