一、使用foreach实现socket的实时读取并写入mysql中:
--------------------------------
import java.sql.DriverManager
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* Created by Administrator on 2018/3/8.
*/
object SparkStreamingForeachRDDScala {
def createNewConnection() = {
Class.forName("com.mysql.jdbc.Driver")
val conn = DriverManager.getConnection("jdbc:mysql://192.168.231.1:3306/big9","root","root")
conn
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("worldCount")
conf.setMaster("local[4]")
//时间片是2秒
val ssc = new StreamingContext(conf ,Seconds(2))
ssc.checkpoint("file:///d:/java/chk")
//创建套接字文本流
val ds1 = ssc.socketTextStream("s101", 8888)
val ds2 = ds1.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
ds2.foreachRDD(rdd=>{
rdd.foreachPartition(it=>{
val conn = createNewConnection()
// executed at the driver
val ppst = conn.prepareStatement("insert into wc(word,cnt) values(?,?)")
conn.setAutoCommit(false)
for(e <- it){
ppst.setString(1 , e._1)
ppst.setInt(2,e._2)
ppst.executeUpdate()
}
conn.commit()
conn.close()
ppst.close()
})
})
//启动流
ssc.start()
ssc.awaitTermination()
}
}
二、Spark Stream + Spark SQL组合使用
--------------------------------
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* Created by Administrator on 2018/3/8.
*/
object SparkStreamingWordCountSparkSQLScala {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("worldCount")
conf.setMaster("local[2]")
//时间片是2秒
val ssc = new StreamingContext(conf ,Seconds(2))
ssc.checkpoint("file:///d:/java/chk")
//创建套接字文本流
val lines = ssc.socketTextStream("s101", 8888)
//压扁生成单词流
val words = lines.flatMap(_.split(" "))
words.foreachRDD(rdd=>{
val spark = SparkSession.builder.config(rdd.sparkContext.getConf).getOrCreate()
import spark.implicits._
val df1= rdd.toDF("word")
df1.createOrReplaceTempView("_temp")
spark.sql("select word,count(*) from _temp group by word").show()
})
//启动流
ssc.start()
ssc.awaitTermination()
}
}
三、使用direct方式创建kafka消费
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
/**
* Created by Administrator on 2018/3/8.
*/
object SparkStreamingKafkaScala {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("kafka")
conf.setMaster("local[*]")
val ssc = new StreamingContext(conf , Seconds(2))
//kafka参数
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "s102:9092,s103:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "g1",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val topics = Array("topic1") //val topics = Array("topicl")
val stream = KafkaUtils.createDirectStream[String, String]( //val stream = KafkaUtils.createDirectStream[String,String]
ssc, //ssc,PreferConsistent,Subscribe[String,String](topics,kafkaParams)
PreferConsistent, //位置策略
Subscribe[String, String](topics, kafkaParams) //消费者策略
)
val ds2 = stream.map(record => (record.key, record.value)) //val ds2 = stream.map(record=>(record.key,record.value))
ds2.print() //ds2.print() ssc.start() ssc.awaitTermination()
ssc.start()
ssc.awaitTermination()
}
}
四、消费者和本地策略综合测试;
import java.net.Socket
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
import scala.collection.mutable.ArrayBuffer
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.kafka010._
import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
/**
* Created by Administrator on 2018/3/8.
*/
object SparkStreamingKafkaScala {
def sendInfo(msg: String, objStr: String) = {
//获取ip
val ip = java.net.InetAddress.getLocalHost.getHostAddress
//得到pid
val rr = java.lang.management.ManagementFactory.getRuntimeMXBean();
val pid = rr.getName().split("@")(0);
//pid
//线程
val tname = Thread.currentThread().getName
//对象id
val sock = new java.net.Socket("s101", 8888)
val out = sock.getOutputStream
val m = ip + "\t:" + pid + "\t:" + tname + "\t:" + msg + "\t:" + objStr + "\r\n"
out.write(m.getBytes)
out.flush()
out.close()
}
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("kafka")
// conf.setMaster("spark://s101:7077")
conf.setMaster("local[8]")
val ssc = new StreamingContext(conf, Seconds(5))
//kafka参数
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "s102:9092,s103:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "g1",
"auto.offset.reset" -> "latest",
"enable.auto.commit" -> (false: java.lang.Boolean)
)
val map = scala.collection.mutable.Map[TopicPartition,String]()
map.put(new TopicPartition("t1" , 0) , "s102")
map.put(new TopicPartition("t1" , 1) , "s102")
map.put(new TopicPartition("t1" , 2) , "s102")
map.put(new TopicPartition("t1" , 3) , "s102")
val locStra = LocationStrategies.PreferFixed(map) ;
val consit = LocationStrategies.PreferConsistent
val topics = Array("t1")
//主题分区集合
val tps = scala.collection.mutable.ArrayBuffer[TopicPartition]()
tps.+=(new TopicPartition("t1" , 0))
// tps.+=(new TopicPartition("t2" , 1))
// tps.+=(new TopicPartition("t3" , 2))
//偏移量集合
val offsets = scala.collection.mutable.Map[TopicPartition,Long]()
offsets.put(new TopicPartition("t1", 0), 3)
// offsets.put(new TopicPartition("t2", 1), 3)
// offsets.put(new TopicPartition("t3", 2), 0)
val conss = ConsumerStrategies.Assign[String,String](tps , kafkaParams , offsets)
//创建kakfa直向流
val stream = KafkaUtils.createDirectStream[String,String](
ssc,
locStra,
ConsumerStrategies.Assign[String, String](tps, kafkaParams, offsets)
)
val ds2 = stream.map(record => {
val t = Thread.currentThread().getName
val key = record.key()
val value = record.value()
val offset = record.offset()
val par = record.partition()
val topic = record.topic()
val tt = ("k:"+key , "v:" + value , "o:" + offset, "p:" + par,"t:" + topic ,"T : " + t)
//xxxx(tt) ;
//sendInfo(tt.toString() ,this.toString)
tt
})
ds2.print()
ssc.start()
ssc.awaitTermination()
}
}