Streaming自定义数据源

sparkStreaming自定义数据源.

myReceiver

import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import java.nio.charset.StandardCharsets
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver

class myReceiver(host: String, port: Int) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_SER_2) {
  //定义receive方法,接受socket的数据,调用store方法
  def receiveDatas(): Unit = {
    val socket = new Socket(host, port)
    //
    val stream = socket.getInputStream
    //将流转换成字节符。将接收到的数据转换成字节符串
    val reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
    //
    var line: String = null
    //抽取到的结果不能为空,并且receive接收器没有停止
    while ((line = reader.readLine()) != null && !isStopped()) {
      //将line数据储存起来,传送下去
      store(line)
    }
    reader.close()
    socket.close()
    stream.close()
  }
  //onstart方法会反复调用
  override def onStart(): Unit = {
    //每隔设定的时间调用一次
    new Thread() {
      //线程开始后复写run方法
      override def run(): Unit = {
        //将socketLiam的数据接收
        receiveDatas()
      }
    }.start
  }
  //onstop方法会在停止的时候调用
  override def onStop(): Unit = ???
}

sparkStreaming

import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}

object sparkStreaming03 {

  def updateFunc(inputStream: Seq[Int], resultStream: Option[Int]): Option[Int] = {
    val result = inputStream.sum + resultStream.getOrElse(0)
    Option(result)
  }

  def main(args: Array[String]): Unit = {
    val sparkContext = new SparkContext(new SparkConf().setMaster("local[4]").setAppName("sourceFile"))
    val streamingContext = new StreamingContext(sparkContext, Seconds(3))
    streamingContext.checkpoint("./check_point")
    val inputStream: ReceiverInputDStream[String] = streamingContext.receiverStream(new myReceiver("node01", 9999))
    val stream = inputStream.flatMap(_.split(" ")).map((_, 1)).updateStateByKey(updateFunc)
    stream.print()
    streamingContext.start()
    streamingContext.awaitTermination()
  }

}

猜你喜欢

转载自blog.csdn.net/weixin_44429965/article/details/107417469