object HighKafkaSource2 {
def main(args: Array[String]): Unit = {
//这种方式每次都是new 的
// offset保存并没有起效果
//造成每次消费都是从最新的开始读 初始化找不到初始化的offset
//默认使用的是最新的 可配置
val conf: SparkConf = new SparkConf().setAppName("high1").setMaster("local[*]")
val ssc = new StreamingContext(conf,Seconds(3))
//ssc.checkpoint("./A1")
var params = Map[String,String](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG ->"hadoop103:9092,hadoop104:9092,hadoop105:9092",
ConsumerConfig.GROUP_ID_CONFIG -> "big1015",
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG-> "org.apache.kafka.common.serialization.StringDeserializer",
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> "org.apache.kafka.common.serialization.StringDeserializer"
//ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest"
)
val inputDs: InputDStream[(String, String)] = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
ssc,
params,
Set[String]("highkafka"))
inputDs.print
ssc.start()
ssc.awaitTermination()
}
}
如果想从开始的offset开始读,换消费者组,并且设置
ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest",即从头开始读,因为默认还是读最新的