receiver模式代码

package com.bjsxt.sparkstreaming;

/**
* 向kafka中生产数据
* @author root
*
*/
public class SparkStreamingDataManuallyProducerForKafka extends Thread{

static String[] channelNames = new String[]{
"Spark","Scala","Kafka","Flink","Hadoop","Storm",
"Hive","Impala","HBase","ML"
};

static String[] actionNames = new String[]{"View", "Register"};

private String topic; //发送给Kafka的数据,topic
private Producer<Integer, String> producerForKafka;

private static String dateToday;
private static Random random;

public SparkStreamingDataManuallyProducerForKafka(String topic){
dateToday = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
this.topic = topic;
random = new Random();
Properties properties = new Properties();
//Kafka集群
properties.put("metadata.broker.list","node01:9092,node02:9092,node03:9092");
//发送消息key的编码格式
properties.put("serializer.class", StringEncoder.class.getName());
//发送消息的value的编码格式
producerForKafka = new Producer<Integer, String>(new ProducerConfig(properties)) ;
}

@Override
public void run() {
int counter = 0;
while(true){
counter++;
String userLog = userlogs();
// System.out.println("product:"+userLog+" ");
producerForKafka.send(new KeyedMessage<Integer, String>(topic,userLog));
//每两条数据暂停2秒
if(0 == counter%2){
counter = 0;
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}

public static void main( String[] args ){

new SparkStreamingDataManuallyProducerForKafka("t0325").start();

}

//生成随机数据
private static String userlogs() {

StringBuffer userLogBuffer = new StringBuffer("");
int[] unregisteredUsers = new int[]{1, 2, 3, 4, 5, 6, 7, 8};
long timestamp = new Date().getTime();
Long userID = 0L;
long pageID = 0L;

//随机生成的用户ID
if(unregisteredUsers[random.nextInt(8)] == 1) {
userID = null;
} else {
userID = (long) random.nextInt(2000);
}

//随机生成的页面ID
pageID = random.nextInt(2000);

//随机生成Channel
String channel = channelNames[random.nextInt(10)];

//随机生成action行为
String action = actionNames[random.nextInt(2)];

userLogBuffer.append(dateToday)
.append("\t")
.append(timestamp)
.append("\t")
.append(userID)
.append("\t")
.append(pageID)
.append("\t")
.append(channel)
.append("\t")
.append(action);
// .append("\n");

System.out.println(userLogBuffer.toString());
return userLogBuffer.toString();
}

}

<----------------------------------------------------------------------------------------------------------------------------------------------->

package com.bjsxt.sparkstreaming;
import scala.Tuple2;
/**
* receiver 模式并行度是由blockInterval决定的
* @author root
*
*/
public class SparkStreamingOnKafkaReceiver {

public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("SparkStreamingOnKafkaReceiver")
.setMaster("local[2]");
//开启预写日志 WAL机制
conf.set("spark.streaming.receiver.writeAheadLog.enable","true");

JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(5));
jsc.checkpoint("./receivedata");

Map<String, Integer> topicConsumerConcurrency = new HashMap<String, Integer>();
/**
* 设置读取的topic和接受数据的线程数
*/
topicConsumerConcurrency.put("t0325", 1);

/**
* 第一个参数是StreamingContext
* 第二个参数是ZooKeeper集群信息（接受Kafka数据的时候会从Zookeeper中获得Offset等元数据信息）
* 第三个参数是Consumer Group 消费者组
* 第四个参数是消费的Topic以及并发读取Topic中Partition的线程数
*
* 注意：
* KafkaUtils.createStream 使用五个参数的方法，设置receiver的存储级别
*/
/* JavaPairReceiverInputDStream<String,String> lines = KafkaUtils.createStream(
jsc,
"node02:2181,node03:2181,node04:2181",
"MyFirstConsumerGroup",
topicConsumerConcurrency,
StorageLevel.MEMORY_AND_DISK());
*/
JavaPairReceiverInputDStream<String,String> lines = KafkaUtils.createStream(
jsc,
"node02:2181,node03:2181,node04:2181",
"MyFirstConsumerGroup",
topicConsumerConcurrency);

JavaDStream<String> words = lines.flatMap(new FlatMapFunction<Tuple2<String,String>, String>() {

/**
*
*/
private static final long serialVersionUID = 1L;

public Iterable<String> call(Tuple2<String,String> tuple) throws Exception {
return Arrays.asList(tuple._2.split("\t"));
}
});

JavaPairDStream<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {

/**
*
*/
private static final long serialVersionUID = 1L;

public Tuple2<String, Integer> call(String word) throws Exception {
return new Tuple2<String, Integer>(word, 1);
}
});

JavaPairDStream<String, Integer> wordsCount = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
//对相同的Key，进行Value的累计（包括Local和Reducer级别同时Reduce）

/**
*
*/
private static final long serialVersionUID = 1L;

public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
}
});

wordsCount.print(100);

jsc.start();
jsc.awaitTermination();
jsc.close();
}

}

猜你喜欢