Spark sparkStreaming流式处理数据并存储到数据库

流式数据处理（stream processing）

要处理的数据就像流水一样，源源不断的产生数据，需要实时进行处理
对SparkCore的高级API的封装，将流式的数据切分为小的批次batch（按照时间间隔）的数据，然后使用SparkCore进行处理
DStream集合，List
StreamingContext:上下文对象，从实时流式数据源中接收数据
底层还是SparkContext

代码实现：案例WordCount

object StreamingWordPrint{
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[2]").setAppName("B_StreamingWordPrint")
    val ssc = new StreamingContext(conf, Seconds(1))

    ssc.sparkContext.setLogLevel("WARN")

    val lines = ssc.socketTextStream("bigdata-hpsk01.ares.com", 9999)

    val words = lines.flatMap(_.split(" "))

    val pairs = words.map(word => (word, 1))
    val wordCounts = pairs.reduceByKey(_ + _)

    wordCounts.foreachRDD((rdd,time)=>{
      println("-----------------------------------")
      val batchTime = time.milliseconds
      val sdf = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
      val batchDataTime = sdf.format(new Date(batchTime))
      println(s"Time:${batchDataTime}")
      println("-----------------------------------")
      if(!rdd.isEmpty()){
        println("=============================")
        rdd.coalesce(1).foreachPartition(_.foreach(println))
      }
    })

    wordCounts.foreachRDD(rdd =>{
      if(!rdd.isEmpty()){
        rdd.coalesce(1).foreachPartition(iter =>{
          //1.获取连接
          Class.forName("com.mysql.jdbc.Driver")
          val url = "jdbc:mysql://bigdata-hpsk01.ares.com/test"
          val userName = "root"
          val password = "123456"

          var conn: Connection = null

          try{
            conn = DriverManager.getConnection(url,userName,password)
            val pst = conn.prepareStatement("INSERT INTO tb_result_Streaming(k,v) VALUES(?,?)")
          iter.foreach{
            case(k,v)=>{
              println(s"k = ${k},v = ${v}")
              pst.setString(1,k)
              pst.setInt(2,v)
              pst.executeUpdate()
            }}
          }catch {
            case e:Exception =>e.printStackTrace()
          }finally {
            if (conn !=null) conn.close()
          }
        })
      }
    })
    wordCounts.print()
    ssc.start()             // Start the computation
    ssc.awaitTermination()  // Wait for the computation to terminate
  }
}

-Ares

发布了79 篇原创文章 · 获赞 89 · 访问量 2万+

私信关注

Spark sparkStreaming流式处理数据并存储到数据库

流式数据处理（stream processing）

代码实现：案例WordCount

猜你喜欢