import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import scala.util.Random
//传感器数据
case class SensorReading( id: String, timestamps: Long, temperature: Double)
object SourceTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
//数据源一:自定义集合
val stream1 = env.fromCollection(List(
SensorReading("1", 1111111111, 21.11),
SensorReading("2", 1111111112, 21.22),
SensorReading("3", 1111111113, 21.33),
SensorReading("4", 1111111114, 21.44)
))
stream1.print("stream1")
//数据源二:文件 txt
val stream2 = env.readTextFile("/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt")
.map{ line=>
SensorReading(line.split(",")(0),line.split(",")(1).toLong,line.split(",")(2).toDouble)
}
stream2.print("stream2")
//数据源三:kafka
//测试: 控制台生产数据 ./bin/kafka-console-producer.sh --broker-list cdh-master:9092 --topic sensor
val property = new Properties()
property.setProperty("bootstrap.servers", "cdh-master:9092")
property.put("group.id", "consumer-group")
property.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
property.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
property.setProperty("auto.offset.reset", "latest")
val stream3 = env.addSource(new FlinkKafkaConsumer("sensor", new SimpleStringSchema(), property))
stream3.print("stream3")
//数据源四:自定义(测试多用)
val stream4 = env.addSource(new SensorSource())
stream4.print("stream4")
env.execute("source test")
}
}
class SensorSource extends SourceFunction[SensorReading]{
//定义一个flag 表示数据源是否运行
var running: Boolean = true
//正常生成数据
override def run(ctx: SourceFunction.SourceContext[SensorReading]): Unit = {
//初始化一个随机数生成器
val random = new Random()
//初始化定义一个 传感器对象
var curTemp = 1.to(10).map{ i=>
("sensor_" + i, 60 + random.nextGaussian()*20)
}
//死循环生成数据
while (running){
//在前一次的值上更新
curTemp = curTemp.map(
t=> (t._1, t._2 + random.nextGaussian())
)
val curTime = System.currentTimeMillis()
val data = curTemp.map(t=>
//发出生成的数据
ctx.collect(SensorReading(t._1, curTime/1000, t._2))
)
}
Thread.sleep(500)
}
//停止生成数据
override def cancel(): Unit = running = false
}
Flink learning: operación de la API del operador de la fuente de datos de origen
Supongo que te gusta
Origin blog.csdn.net/q18729096963/article/details/107588167
Recomendado
Clasificación