myReceiver
import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import java.nio.charset.StandardCharsets
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.receiver.Receiver
class myReceiver(host: String, port: Int) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_SER_2) {
//定义receive方法,接受socket的数据,调用store方法
def receiveDatas(): Unit = {
val socket = new Socket(host, port)
//
val stream = socket.getInputStream
//将流转换成字节符。将接收到的数据转换成字节符串
val reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))
//
var line: String = null
//抽取到的结果不能为空,并且receive接收器没有停止
while ((line = reader.readLine()) != null && !isStopped()) {
//将line数据储存起来,传送下去
store(line)
}
reader.close()
socket.close()
stream.close()
}
//onstart方法会反复调用
override def onStart(): Unit = {
//每隔设定的时间调用一次
new Thread() {
//线程开始后复写run方法
override def run(): Unit = {
//将socketLiam的数据接收
receiveDatas()
}
}.start
}
//onstop方法会在停止的时候调用
override def onStop(): Unit = ???
}
sparkStreaming
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object sparkStreaming03 {
def updateFunc(inputStream: Seq[Int], resultStream: Option[Int]): Option[Int] = {
val result = inputStream.sum + resultStream.getOrElse(0)
Option(result)
}
def main(args: Array[String]): Unit = {
val sparkContext = new SparkContext(new SparkConf().setMaster("local[4]").setAppName("sourceFile"))
val streamingContext = new StreamingContext(sparkContext, Seconds(3))
streamingContext.checkpoint("./check_point")
val inputStream: ReceiverInputDStream[String] = streamingContext.receiverStream(new myReceiver("node01", 9999))
val stream = inputStream.flatMap(_.split(" ")).map((_, 1)).updateStateByKey(updateFunc)
stream.print()
streamingContext.start()
streamingContext.awaitTermination()
}
}