spark streaming 应用

build.sdt 

name := "scala-spark-streaming-app"

version :="1.0"

scalaVersion :="2.10.4"

libarayDependencies += "org.apache.spark" %% "spark-mllib" %"1.1.0"

libraryDependencies +="org.apache.spark" %% "spark-streaming" %"1.1.0"

消息生成端

object StreamingProducer {

    def main(args: Array[String]){

         val random = new Random()

        val MaxEvents=6

        //读取可能的名称

        val namesResource=

        this.getClass.getResourceAsStream("/names.csv")

       val names=scala.io.Source.fromInputStream(namesReource)

               .getLines()

               .toList

               .head

               .split(".")

               .toSeq

        //生成一系列可能的产品

       val products= Seq(

             "iPhone Cover"->9.99,

              "Headphones"->5.49,

              "Samsung Galaxy Cover"->8.95

              "iPad Cover"->7.49

               )

         // 生成随机产品活动

        def generateProductEvents(n:Int)={

              (1 to n).map{i=> val (product, price)=products(random.nextInt(products.size))

                                                                         val user =random.shuffle(names).head

                                                                         (user, product, price)

                 }

     }

 //创建网络生成器

val listener = new ServerSocket(9999)

println("Listening on port:9999")

while (true){

val socket=listener.accept()

new Thread(){

 override def run={

      println("got client connected from :" +

      socket.getInetAddress)

      val out = new PrintWriter(socket.getOutputStream(),true)

     while (true) {

         Thread.sleep(1000)

         val num = random.nextInt(MaxEvents)

         val productEvents=generateProductEvents(num)

         productEvents.foreach{event =>

               out.write(event.productIterator.mkString(","))

               out.write("\n")

           }

         out.flush()

        println(s"created $num events...")

        

           }

socket.close()

      }

}.start()

}

}

}

//创建简单的流处理程序

object SimpleStreamingApp {

    def main(args:Array[String]) {

        val ssc = new StreamingContext("local[2]","Fisrst Streaming App", Seconds(10))

        val stream = ssc.socketTestStream("localhost",9999)

        stream.print()

        ssc.start()

       ssc.awaitTermination()

      }

}

//复杂的streaming app 应用 计算DStream 中每一批的指标并打印结果

object StreamingAnalysticsApp{

  def main(args: Array[String]){

       val ssc= new StreamingContext("local[2]","First Streaming App", Seconds(10))

       val stream=ssc.socketTextStream("locakhost",9999)

       val events = stram.map{record => val event = record.split(",")

                                                               (event(0),event(1),event(2))}

  

   // 使用foreachRDD 对流上的每个RDD应用任意处理函数

     events.foreachRDD{(rdd,time)=>

              val numPurchases = rdd.count()

              val uniqueUsers = rdd.map{case(user,-,-)=>user

                        }.distinct().count()

              val totalRevenue=rdd.map{case(-,-,price)=>price.toDouble}.sum()

              val productsByPopularity = rdd.map { case (user,product,price)=>(product,1)}.reduceByKey(_+_).collect().sortBy(-_._2)

             val mostPopular = productsByPopularity(0)

             val formatter= new SimpleDateFormat

             val dateStr = formatter.format(new Date(time.milliseconds))

             println(s"==Batch start time:$dateStr ==")

             println("Total purchases: " + numPurchases)
             println("Unique users: " + uniqueUsers)
             println("Total revenue: " + totalRevenue)
             println("Most popular product: %s with %d
             purchases".format(mostPopular._1, mostPopular._2))

      }

 ssc.start()

ssc.awaitTermination()

}

}

//有状态的流计算

object StreamingStateApp {

  import org.apache.spark.streaming.StreamingContext._

   def updateState(prices: Seq[(String,Double)], currentTotal:Option[(Int,Double)]) = {

         val currentRevenue= prices.map(_._2).sum

         val currentNumberPurchases = prices. size

         val state = currentTotal.getOrElse((0,0.0))

         Some ((currentNumberPurchases +state._1, currentRevenue +state._2))

        }

  def main(args:Array[String]) {

      val ssc= new StreamingContext("local[2]","First Streaming App", Seconds(10))

       // 对有状态的操作,需要设置一个检查点

      ssc.checkpoint("/tmp/sparkstreaming/")

      val stream = ssc.socketTextStream("localhost", 9999)

      // 基于原始文本元素生成活动流

      val events =stream.map{ record =>

               val event =record.split(,)

                (event(0),event(1),event(2).toDouble)}

      val users = events.map{ case (user, product, price) => (user,(product,price))}

     val revenuePerUser = users.updateStateByKey(updateState)

     revenuePerUser.print()

    ssc.start()

    ssc.awaitTermination()

      }

}

猜你喜欢

转载自blog.csdn.net/weixin_40988315/article/details/81479961