一、SparkRDD和SparkStream的区别
二、SparkStream从Kafka上获取信息最后保存到数据库中
package com.stream.com
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{HashPartitioner, SparkConf, SparkContext}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
/**
* TODO
*
* @author 徐磊
* @email [email protected]
* @data2020/02/04 上午 11:43
*/
object SparkStream extends App {
//累加函数
val lj=(it:Iterator[(String,Seq[Int],Option[Int])])=>{
it.flatMap{
case(x,y,z)=>Some(y.sum+z.getOrElse(0)).map(s=>(x,s))
}
}
val conf = new SparkConf()
.setAppName("stream")
.setMaster("local[2]") //**一定要分配大于等于2个线程处理,小于2的话会报错,原因是因为一个线程是处理接收kafka消息的一个线程是负责计算的
val sc = new SparkContext(conf)
sc.setCheckpointDir("d://17777")
//*********************************和sparkRDD一样
//设置接收消息的间隔时间,ssc后期有很多作用,设置端口和接收kafka的信息
val ssc = new StreamingContext(sc,Seconds(5))
//设置消息来源,从kafka接收消息就不用设置ip地址和端口号,所以注释掉了
// val ds = ssc.socketTextStream("192.168.224.132",4444)
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "node132:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "g1",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("1705b")
val stream = KafkaUtils.createDirectStream[String, String](
ssc,
PreferConsistent,
Subscribe[String, String](topics, kafkaParams)
)
val value = stream.map(_.value())
//累加的wordcount求和,调用上面的lj函数
val res= value.flatMap(_.split(" ")).map((_,1)).updateStateByKey(lj,new HashPartitioner(sc.defaultParallelism),true)
res.print()//输出格式
// 将数据保存在mysql数据库
res.foreachRDD(cs => {
var conn: Connection = null;
var ps: PreparedStatement = null;
try {
Class.forName("com.mysql.jdbc.Driver").newInstance();
cs.foreachPartition(f => {
conn = DriverManager.getConnection("jdbc:mysql://192.168.224.132:3306/kafka?useUnicode=true&characterEncoding=utf8", "root", "root"); //定义连接信息
ps = conn.prepareStatement("insert into xss values(?,?)"); //添加信息的sql语句
f.foreach(s => {
ps.setString(1, s._1);
ps.setInt(2, s._2);
ps.executeUpdate();
})
})
} catch {
case t: Throwable => t.printStackTrace() // TODO: handle error
} finally {
if (ps != null) {
ps.close()
}
if (conn != null) {
conn.close();
}
}
})
ssc.start()//开始
ssc.awaitTermination()//结束
}