package test
import java.sql.DriverManager
import com.typesafe.config.ConfigFactory
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* 使用SparkStreaming集成Spark Sql
*/
object Test04 {
def main(args: Array[String]): Unit = {
//关闭不想打印的日志信息
Logger.getLogger("org").setLevel(Level.WARN)
//默认去加载resouces目录下的配置文件
val config = ConfigFactory.load()
val conf = new SparkConf().setMaster("local[*]").setAppName("Test04")
//每2秒种采样一次数据
val ssc = new StreamingContext(conf,Seconds(3))
//接收数据
val words: ReceiverInputDStream[String] = ssc.socketTextStream("hadoop01",1234)
words.foreachRDD(rdd=>{
//计算当前批次结果
val current_batch_result: RDD[(String, Int)] = rdd.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
//插入当前批次计算出来的数据结果
current_batch_result.foreachPartition(partition => {
//创建一个数据库链接
val url = config.getString("db.url")
val user = config.getString("db.user")
val password = config.getString("db.password")
val conn = DriverManager.getConnection(url, user, password)
//将当前分区里面的所有是数据都插入到数据库中
partition.foreach(tp=>{
//判断将要插入的数据是否之前已经插入过,如果已经插入过,则本次应该是更新操作,否则就是一个插入
val pstmts = conn.prepareStatement("select * from wordcount where word=?")
pstmts.setString(1,tp._1)
val rs = pstmts.executeQuery()
var flag =false
while(rs.next()){
flag = true
//即将插入的单词已经 存在的话,可以断定我们要进行更新操作
val total = rs.getInt("total")
//计算最新的值
val newTotal = total + tp._2
//更新到数据库
val update = conn.prepareStatement("update wordcount set total=? where word=?")
update.setInt(1,newTotal)
update.setString(2,tp._1)
update.executeUpdate()
update.close()
}
rs.close()
pstmts.close()
//执行插入操作
if(!flag){
//插入一条数据
val pstmt = conn.prepareStatement("inset into wordcount values(?,?)")
pstmt.setString(1,tp._1)
pstmt.setInt(2,tp._2)
//执行sql
pstmt.executeUpdate()
pstmt.close()
}
if (conn!=null) conn.close()
})
})
})
ssc.start()
ssc.awaitTermination()
}
}
需要配置文件
application.conf
db.url="jdbc:mysql://localhost:3306/bigdata?characterEncoding=UTF-8&serverTimezone=Asia/Shanghai"
db.user="root"
db.password="123456"
db.table="wordcount"