Spark Streaming custom data source of WordCount

Implement monitoring a port number, the port number to obtain content.

package org.feng.stream

import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import java.nio.charset.StandardCharsets

import org.apache.spark.internal.Logging
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver
import org.apache.spark._
import org.apache.spark.streaming._

/**
  * Created by Feng on 2019/12/2 15:52
  * CurrentProject's name is spark
  * 自定义数据源
  */
object MyDefine {

  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("MyDefine")

    val streamingContext = new StreamingContext(sparkConf, Seconds(3))

    val lines = streamingContext.receiverStream(new CustomReceiver("localhost", 12345))

    val line = lines.flatMap(_.split(" "))

    line.map(word => (word, 1)).reduceByKey(_+_).print()

    streamingContext.start()
    streamingContext.awaitTermination()
  }
}

class CustomReceiver(host:String, port:Int) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging{
  override def onStart(): Unit = {
    new Thread("CustomRecei"){
      override def run(): Unit ={
        receive()
      }
    }.start()
  }

  override def onStop(): Unit = {
    // 什么都不写
  }

  /**
    * 接收方法
    */
  private def receive(): Unit = {
    try{
      var userInput:String = ""
      val socket:Socket = new Socket(host, port)
      val reader = new BufferedReader(new InputStreamReader(socket.getInputStream, StandardCharsets.UTF_8))
      userInput = reader.readLine()

      while(!isStopped() && userInput != null){
        store(userInput)
        userInput = reader.readLine()
      }

      reader.close()
      socket.close()

      restart("Trying to connect again")
    } catch {
      case e:java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e)
      case t:Throwable => restart("Error receiving data", t)
    }
  }
}

note

The stream processing program is a local port monitor, using netcat to send data to the port monitored. When running locally, you need to install netcat version windows (https://eternallybored.org/misc/netcat/). Click here, and download the 1.1.2 version. After unpacking the archive, copy all subfolders inside the System32 folder, you can use cmd to visit. Command nc -l -p [port], I am here listening port is 12345.

Original link: https: //blog.csdn.net/FBB360JAVA/article/details/103410629

Published 108 original articles · won 117 Like · views 30000 +

Guess you like

Origin blog.csdn.net/FBB360JAVA/article/details/104280771