Flink(四)流处理API-Source

1. 从集合读取数据

package com.flink.sounce

import org.apache.flink.streaming.api.scala._

/**
  * @Author :star
  * @Date :2020/7/4 12:56
  * @Version :1.0
  */
object ReadList {
  //从集合中读取数据
  def main(args: Array[String]): Unit = {
    // 创建flink执行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    //读取集合中的数据
    val dateList: DataStream[SensorReading] = env.fromCollection(List(
      SensorReading("sensor_1", 1547718199, 35.80018327300259),
      SensorReading("sensor_6", 1547718201, 15.402984393403084),
      SensorReading("sensor_7", 1547718202, 6.720945201171228),
      SensorReading("sensor_10", 1547718205, 38.101067604893444)
    )
    )
    dateList.print()
    env.execute()

  }
}
case class SensorReading(id: String, timestamp: Long, temprature: Double)

2.从文件读取数据

package com.flink.sounce

import org.apache.flink.streaming.api.scala._

/**
  * @Author :star
  * @Date :2020/7/4 12:56
  * @Version :1.0
  */
object ReadFile {
  //从集合中读取数据
  def main(args: Array[String]): Unit = {
    // 创建flink执行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    //读取文件中的数据
    val data= env.readTextFile("YOUR_FILE_PATH")
    date.print()
    env.execute()

  }
}

3.从kafka中读取数据

首先引入依赖

<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.11 -->

<dependency>

    <groupId>org.apache.flink</groupId>

    <artifactId>flink-connector-kafka-0.11_2.11</artifactId>

    <version>1.7.2</version>

</dependency>

具体代码如下

package com.flink.sounce

import java.util.Properties

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011

/**
  * @Author :star
  * @Date :2020/7/4 15:53
  * @Version :1.0
  */
object KafkaSource {

  def main(args: Array[String]): Unit = {
    //创建执行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    //Kafka环境配置
    val properties = new Properties()
    properties.setProperty("bootstrap.servers","hdp-1:9092")
    properties.setProperty("group.id", "consumer-group")
    properties.setProperty("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer")
    properties.setProperty("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer")
    properties.setProperty("auto.offset.reset", "latest")
    //读取数据
    val stream = env.addSource(new FlinkKafkaConsumer011[String]("flink",new SimpleStringSchema(),properties))
    stream.print()
    //执行
    env.execute()
  }
}

4.自定义source

当所有的数据来源不能满足我们需求的时候 我们这个时候就应该自定义我们的数据来源

package com.flink.sounce

import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala._

import scala.util.Random

/**
  * @Author :star
  * @Date :2020/7/4 16:14
  * @Version :1.0
  */
object MySource {
  // 自定义source源
  def main(args: Array[String]): Unit = {
      //创建执行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
      //添加自定义source源
    val stream = env.addSource(new myStreamSource())
      //实现继承的方法
    stream.print()
      //执行
    env.execute()
  }
}

class myStreamSource extends SourceFunction[SensorReading]{
  //判断数据源是否在正常运行
  var running : Boolean = true

  override def cancel(): Unit = {
    //数据源不在运行  改变running的状态
    running = false
  }
  override def run(sourceContext: SourceFunction.SourceContext[SensorReading]): Unit = {
    //初始化一个数据数发生器
    var rand = new Random()
    //初始化sernsorID 和 体感温度
    val curTmp = 1.to(10).map(
      i => ( "sensor_" + i,40 + rand.nextGaussian() * 20)
    )
    //对数据进行变化
    while (running){
      val data = curTmp.map(
        d => (d._1,d._2)
      )
      //获取当前时间的毫秒值
      var curTime = System.currentTimeMillis()
      //将数据遍历发送
      data.foreach(
        d => sourceContext.collect(SensorReading(d._1,curTime,d._2))
      )
      Thread.sleep(1000)

    }
  }
}

猜你喜欢

转载自blog.csdn.net/weixin_43233971/article/details/107448788