版权声明: https://blog.csdn.net/xiongbingcool/article/details/80857907
出现以下错误都是pom中引入的两个依赖版本问题
java.lang.NoClassDefFoundError: org/apache/spark/Logging
java.lang.AbstractMethodError at org.apache.spark.internal.Logging$class.initializeLogIfNecessary(Logging.scala:99)
下面给出测试正常的pom及代码,因为环境使用的spark包版本为2.2.1,所以依赖添加对应版本,防止在服务器上运行时出问题。
pom.xml中添加依赖:
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>2.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-kafka-0-10 -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
<version>2.2.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/redis.clients/jedis -->
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
spark任务示例:
import com.alibaba.fastjson.JSONObject;
import com.dongao.beacon.ds.spark.redis.RedisClient;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.TopicPartition;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.Jedis;
import java.util.*;
/**
* 简单的kafka消息计数
*
* @author [email protected]
* @version 1.0
* @create 2018/6/29 10:59
**/
public class TotalCounter {
private static Logger logger = LoggerFactory.getLogger(TotalCounter.class);
static final String KAFKA_GROUP = "streaming-group";
static final String KAFKA_TOPICS = "aaa";
static final String KAFKA_BOOTSTRAP_SERVERS = "tomcat105:9092";
public static void main(String[] args) throws InterruptedException {
SparkConf conf = new SparkConf().setAppName("spark-streaming-kafka-total-counter")
.setMaster("local[1]")
//确保在kill任务时,能够处理完最后一批数据,再关闭程序,不会发生强制kill导致数据处理中断,没处理完的数据丢失
.set("spark.streaming.stopGracefullyOnShutdown", "true")
.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.registerKryoClasses(
new Class[]{RedisClient.class}
);
// 20s读取一次kafka
JavaStreamingContext streamingContext = new JavaStreamingContext(conf, Durations.seconds(20));
Collection<String> topicsSet = new HashSet<>(Arrays.asList(KAFKA_TOPICS.split(",")));
// kafka相关参数
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("metadata.broker.list", KAFKA_BOOTSTRAP_SERVERS);
kafkaParams.put("bootstrap.servers", KAFKA_BOOTSTRAP_SERVERS);
// 使用了不同于ETL的group
kafkaParams.put("group.id", KAFKA_GROUP);
// 消费位置
kafkaParams.put("auto.offset.reset", "latest");
kafkaParams.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
kafkaParams.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
kafkaParams.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
// Topic分区
Map<TopicPartition, Long> offsets = new HashMap<>();
// 获取kafka数据
JavaInputDStream<ConsumerRecord<Object, Object>> messages = KafkaUtils.createDirectStream(
streamingContext,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.Subscribe(topicsSet, kafkaParams, offsets)
);
// 消息是数组,进行拆分
messages
.flatMap(row -> {
long timestamp = row.timestamp();
List<HashMap> maps = new ArrayList<>();
try {
maps = JSONObject.parseArray(row.value().toString(), HashMap.class);
} catch (Exception e) {
logger.error("【重要】消息转Json异常", e);
}
// 写入redis
DateTime curr = new DateTime(timestamp);
Jedis jedis = RedisClient.getInstance().getClient();
// 按照分钟记录sdk传入kafka的记录数:key,field,val
jedis.hincrBy("beacon:streaming:kafka:total:counter:" + curr.toString("yyyyMMdd"), curr.toString("HH:mm"), maps.size());
jedis.close();
return maps.iterator();
}).print(0);
streamingContext.start();
streamingContext.awaitTermination();
}
}