SparkStreaming保存数据至MySQL

import java.sql.{
    
    Connection, DriverManager, PreparedStatement}

import org.apache.spark.SparkConf
import org.apache.spark.streaming.{
    
    Seconds, StreamingContext}

/**
  * @ClassName: DStreamOutput
  * @Description:
  * @Author: kele
  * @Date: 2021/2/19 9:40
  **/
object DStreamOutput {
    
    

  def main(args: Array[String]): Unit = {
    
    

    //1、创建StreamingContext
    val ssc = new StreamingContext(new SparkConf().setAppName("output").setMaster("local[4]"),Seconds(5))
    ssc.sparkContext.setLogLevel("error")

    //2、通过socket读取数据
    val ds = ssc.socketTextStream("hadoop102",9999)

    val ds2 = ds.flatMap(_.split(" "))
      .map((_,1))
      .reduceByKey(_+_)

    //3、保存数据
    //本身API
    //ds2.saveAsTextFiles("output/streamoutput")

    ds2.print()
    //数据存储在mysql上
    ds2.foreachRDD(rdd=>{
    
    

      //对该rdd的整个批次进行处理,不是对单个元素进行处理
      rdd.foreachPartition(x=>{
    
    
 		//连接的相关配置不能在Partitions之外,
        //由于每个executor都要执行该程序,所以连接不能写在Driver层面(序列化)
        var connect:Connection = null
        var statement:PreparedStatement = null

        try{
    
    
        //配置连接
          connect = DriverManager.getConnection("jdbc:mysql://hadoop102:3306/test","root","root123")
          //使用preparestatement  1、参数可调,2、可以防止sql注入,3、编译缓存,执行快
          statement = connect.prepareStatement("insert into wordcount values(?,?)")

          //遍历rdd中的数据,将数据保存在mysql上
          x.foreach(y=>{
    
    
            statement.setString(1,y._1)
            statement.setInt(2,y._2)

            //提交数据
            statement.executeUpdate()
          })
        }catch{
    
    
          case e :Exception=>{
    
    
			//此处可以回滚数据
          }
        }finally{
    
    
        //关流
          if(connect != null)
            connect.close()
          if(statement != null)
            statement.close()
        }
      })
    })

    //停止
    ssc.start()

    //阻塞
    ssc.awaitTermination()
  }
}

猜你喜欢

转载自blog.csdn.net/qq_38705144/article/details/113860765