一.RDD队列
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
object WordCount1 {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("WordCount1")
val ssc = new StreamingContext(conf, Seconds(3))
val sourceStream = ssc.socketTextStream("hadoop102",9999)
val result = sourceStream.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
result.print()
ssc.start()
ssc.awaitTermination()
}
}
二.自定义数据源
import java.io.{BufferedReader, InputStreamReader}
import java.net.Socket
import org.apache.spark.SparkConf
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.receiver.Receiver
object MyReceiverDemo {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("WordCount2")
val ssc = new StreamingContext(conf, Seconds(3))
val sourceStream = ssc.receiverStream(new MyReceiver("hadoop102", 9999))
sourceStream.flatMap(_.split(" "))
.map((_,1))
.reduceByKey(_+_)
.print()
ssc.start()
ssc.awaitTermination()
}
}
class MyReceiver(host :String,port:Int) extends Receiver[String](StorageLevel.MEMORY_ONLY){
var socket:Socket = _
var reader:BufferedReader = _
override def onStart(): Unit = {
runInThread{
try{
val socket = new Socket(host, port)
val reader = new BufferedReader(new InputStreamReader(socket.getInputStream, "utf-8"))
var line = reader.readLine()
while (line != null && socket.isConnected){
store(line)
line = reader.readLine()
}
}catch {
case e => e.printStackTrace()
}finally {
restart("重启接收器")
}
}
}
def runInThread(op: =>Unit)={
new Thread(){
override def run(): Unit = op
}.start()
}
override def onStop(): Unit = {
if (socket != null) socket.close()
if (reader != null) reader.close()
}
}
nc -lk 10000