フリンク学習:ソースデータソース演算子API操作

import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import scala.util.Random
//传感器数据
case class SensorReading( id: String, timestamps: Long, temperature: Double)

object SourceTest {

  def main(args: Array[String]): Unit = {

    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //数据源一:自定义集合
    val stream1 = env.fromCollection(List(
      SensorReading("1", 1111111111, 21.11),
      SensorReading("2", 1111111112, 21.22),
      SensorReading("3", 1111111113, 21.33),
      SensorReading("4", 1111111114, 21.44)
    ))
    stream1.print("stream1")

    //数据源二:文件 txt
    val stream2 = env.readTextFile("/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt")
        .map{ line=>
          SensorReading(line.split(",")(0),line.split(",")(1).toLong,line.split(",")(2).toDouble)
        }
    stream2.print("stream2")

    //数据源三:kafka
    //测试: 控制台生产数据 ./bin/kafka-console-producer.sh --broker-list cdh-master:9092 --topic sensor
    val property = new Properties()
    property.setProperty("bootstrap.servers", "cdh-master:9092")
    property.put("group.id", "consumer-group")
    property.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    property.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    property.setProperty("auto.offset.reset", "latest")

    val stream3 = env.addSource(new FlinkKafkaConsumer("sensor", new SimpleStringSchema(), property))

    stream3.print("stream3")

    //数据源四:自定义(测试多用)
    val stream4 = env.addSource(new SensorSource())

    stream4.print("stream4")

    env.execute("source test")
  }
}

class SensorSource extends SourceFunction[SensorReading]{

  //定义一个flag 表示数据源是否运行
  var running: Boolean = true

  //正常生成数据
  override def run(ctx: SourceFunction.SourceContext[SensorReading]): Unit = {
    //初始化一个随机数生成器
    val random = new Random()

    //初始化定义一个 传感器对象
    var curTemp = 1.to(10).map{ i=>
      ("sensor_" + i, 60 + random.nextGaussian()*20)
    }

    //死循环生成数据
    while (running){
      //在前一次的值上更新
      curTemp = curTemp.map(
        t=> (t._1, t._2 + random.nextGaussian())
      )

      val curTime = System.currentTimeMillis()

      val data = curTemp.map(t=>
        //发出生成的数据
        ctx.collect(SensorReading(t._1, curTime/1000, t._2))
      )
    }
    Thread.sleep(500)
  }

  //停止生成数据
  override def cancel(): Unit = running = false
}

おすすめ

転載: blog.csdn.net/q18729096963/article/details/107588167