版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_37050372/article/details/83152777
package com.test.sparkStreaming
import java.sql.{DriverManager, PreparedStatement}
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.{Seconds, StreamingContext}
object MyNetWorkWordCountMysqlState {
def main(args: Array[String]): Unit = {
Logger.getLogger("org.apache.spark").setLevel(Level.OFF)
//加载配置文件,会去加载resources下面的配置文件,
// 默认规则:application.conf -> application.json -> application.properties
val config: Config = ConfigFactory.load()
//创建Streamingcontext对象
val conf = new SparkConf().setAppName("MyNetWorkWordCountMysqlState").setMaster("local[2]")
//定义一个采样时间,每隔2秒钟采集一次数据,这个时间不能随意设置
val ssc: StreamingContext = new StreamingContext(conf,Seconds(2))
//创建一个离散流
val lines = ssc.socketTextStream("marshal",5678)
/**
* 插入当前批次计算结果
* foreachRDD在Driver端执行
* foreachPartition,foreach在worker端运行
*/
lines.foreachRDD(
rdd =>{
//计算当前批次结果
val current_result: RDD[(String, Int)] = rdd.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
//插入当前批次计算出来的结果
current_result.foreachPartition(
partition => {
//创建一个连接
val url = config.getString("db.url")
val user = config.getString("db.user")
val password = config.getString("db.password")
val conn = DriverManager.getConnection(url,user,password)
//将当前分区里面的所有数据都插入到mysql数据库中
partition.foreach(
tp =>{
val word = tp._1
//判断即将插入的数据是否之前已经插入过,如果已经插入过,则进行更新操作,否则就是插入
val pst = conn.prepareStatement("select * from wordcount where words=?")
pst.setString(1,word)
val rs = pst.executeQuery()
var flag = false
while(rs.next()){
flag = true
//即将插入的单词已经存在,可以进行更新操作
println("已经存在")
val i: Int = rs.getInt("total")
val i2 = i + tp._2
//更新
val update = conn.prepareStatement("update wordcount set total = ? where words = ?")
update.setInt(1,i2)
update.setString(2,word)
update.executeUpdate()
update.close()
}
if(!flag){
println("单词不存在,需要插入")
//插入一条数据
val pst: PreparedStatement = conn.prepareStatement("insert into wordcount values(?,?)")
pst.setString(1,tp._1)
pst.setInt(2,tp._2)
pst.executeUpdate()
pst.close()
}
})
if (conn != null)
conn.close()
})
})
ssc.start()
ssc.awaitTermination()
}
}
运行结果: