flink双流连接及处理

两个流连接到一起后,实际还是保持各自的数据结构,为了统一处理,可以通过自定义函数,将两个流的输出格式统一起来,这样就变成一个流了,方便后续处理
在这里插入图片描述

import org.apache.flink.streaming.api.functions.co.CoMapFunction
import org.apache.flink.streaming.api.scala._

object ConnectStreamExample {
    
    
  // DataStream -> ConnectedStream -> DataStream
  def main(args: Array[String]): Unit = {
    
    
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    val stream1: DataStream[(Int, String)] = env.fromElements(
      (1, "a"),
      (2, "b")
    )
    val stream2: DataStream[(Int, Int)] = env.fromElements(
      (1, 1),
      (1, 2)
    )

    // select * from A inner join B on A.id = B.id;
    // conn和conn1的写法是等价的
    val conn: ConnectedStreams[(Int, String), (Int, Int)] = stream1
      .keyBy(_._1)
      .connect(stream2.keyBy(_._1)) //key不一定关联

    val conn1: ConnectedStreams[(Int, String), (Int, Int)] = stream1
      .connect(stream2)
      .keyBy(0,0)

    val outStream: DataStream[String] = conn.map(new MyCoMapFunction)

    outStream.print()

    env.execute()
  }

  class MyCoMapFunction extends CoMapFunction[(Int, String), (Int, Int), String] {
    
    
    override def map1(value: (Int, String)): String = value._2 + " from map1"

    override def map2(value: (Int, Int)): String = value._2.toString + " from map2"
  }
}

猜你喜欢

转载自blog.csdn.net/JavaBigData/article/details/115472449
今日推荐