1. 从集合读取数据
package com.flink.sounce
import org.apache.flink.streaming.api.scala._
/**
* @Author :star
* @Date :2020/7/4 12:56
* @Version :1.0
*/
object ReadList {
//从集合中读取数据
def main(args: Array[String]): Unit = {
// 创建flink执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//读取集合中的数据
val dateList: DataStream[SensorReading] = env.fromCollection(List(
SensorReading("sensor_1", 1547718199, 35.80018327300259),
SensorReading("sensor_6", 1547718201, 15.402984393403084),
SensorReading("sensor_7", 1547718202, 6.720945201171228),
SensorReading("sensor_10", 1547718205, 38.101067604893444)
)
)
dateList.print()
env.execute()
}
}
case class SensorReading(id: String, timestamp: Long, temprature: Double)
2.从文件读取数据
package com.flink.sounce
import org.apache.flink.streaming.api.scala._
/**
* @Author :star
* @Date :2020/7/4 12:56
* @Version :1.0
*/
object ReadFile {
//从集合中读取数据
def main(args: Array[String]): Unit = {
// 创建flink执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//读取文件中的数据
val data= env.readTextFile("YOUR_FILE_PATH")
date.print()
env.execute()
}
}
3.从kafka中读取数据
首先引入依赖
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.11 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_2.11</artifactId>
<version>1.7.2</version>
</dependency>
具体代码如下
package com.flink.sounce
import java.util.Properties
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011
/**
* @Author :star
* @Date :2020/7/4 15:53
* @Version :1.0
*/
object KafkaSource {
def main(args: Array[String]): Unit = {
//创建执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//Kafka环境配置
val properties = new Properties()
properties.setProperty("bootstrap.servers","hdp-1:9092")
properties.setProperty("group.id", "consumer-group")
properties.setProperty("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer")
properties.setProperty("auto.offset.reset", "latest")
//读取数据
val stream = env.addSource(new FlinkKafkaConsumer011[String]("flink",new SimpleStringSchema(),properties))
stream.print()
//执行
env.execute()
}
}
4.自定义source
当所有的数据来源不能满足我们需求的时候 我们这个时候就应该自定义我们的数据来源
package com.flink.sounce
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala._
import scala.util.Random
/**
* @Author :star
* @Date :2020/7/4 16:14
* @Version :1.0
*/
object MySource {
// 自定义source源
def main(args: Array[String]): Unit = {
//创建执行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//添加自定义source源
val stream = env.addSource(new myStreamSource())
//实现继承的方法
stream.print()
//执行
env.execute()
}
}
class myStreamSource extends SourceFunction[SensorReading]{
//判断数据源是否在正常运行
var running : Boolean = true
override def cancel(): Unit = {
//数据源不在运行 改变running的状态
running = false
}
override def run(sourceContext: SourceFunction.SourceContext[SensorReading]): Unit = {
//初始化一个数据数发生器
var rand = new Random()
//初始化sernsorID 和 体感温度
val curTmp = 1.to(10).map(
i => ( "sensor_" + i,40 + rand.nextGaussian() * 20)
)
//对数据进行变化
while (running){
val data = curTmp.map(
d => (d._1,d._2)
)
//获取当前时间的毫秒值
var curTime = System.currentTimeMillis()
//将数据遍历发送
data.foreach(
d => sourceContext.collect(SensorReading(d._1,curTime,d._2))
)
Thread.sleep(1000)
}
}
}