准备
导入kafka-clients-1.1.0.jar
producer
demo
package com.leetech.kafka;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
public class Producer{
private static final String topic = "kafkaTopic";
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put("bootstrap.servers", "10.0.8.174:9096,10.0.8.231:9096");
props.put("acks", "all");
props.put("retries", "0");
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//生产者实例
KafkaProducer producer = new KafkaProducer(props);
// 发送业务消息
i=1;
j=1;
while (true) {
Thread.sleep(1000);
producer.send(new ProducerRecord<String, String>(topic, "key:" + i, "value:" + j));
System.out.println("key:" + i + " " + "value:" + j);
}
}
}
说明
- batch.size:每个分区的缓存数据大小。
- buffer.memory:producer可用的缓冲区的大小。
- send方法
public Future<RecordMetadata> send(ProducerRecord<K,V> record,Callback callback)
//异步发送一条消息,当保存到producer的buffer area后,立即返回并调用callback。
//发送成功的返回值是个RecordMetadata对象,包含offset、分区号、时间戳。
//通过get方法阻塞:
ProducerRecord<byte[],byte[]> record = new ProducerRecord<byte[],byte[]>("my-topic", key, value)
producer.send(record).get();
//无阻塞,发送后调用回调函数:
ProducerRecord<byte[],byte[]> record = new ProducerRecord<byte[],byte[]>("the-topic", key, value);
producer.send(myRecord,
new Callback() {
public void onCompletion(RecordMetadata metadata, Exception e) {
if(e != null)
e.printStackTrace();
System.out.println("The offset of the record we just sent is: " + metadata.offset());
}
});
consumer
demo
常规消费
package com.leetech.kafka;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Properties;
public class Consumer{
private static final Logger logger = LoggerFactory.getLogger(Consumer.class);
private static final String topic = "flumetest";
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "10.0.8.174:9096,10.0.8.231:9096,10.0.8.34:9096");
props.put("group.id", "tbl_stream");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "earliest");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
consumer.subscribe(Arrays.asList(topic));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(500);
for (ConsumerRecord<String, String> record : records) {
System.out.printf("offset = %d, key = %s, value = %s%n", record.offset(), record.key(), record.value());
}
}
}
}
手动设置offset消费
package com.leetech.kafka;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.Set;
public class Consumer_Ruide_Reverse{
private static final Logger logger = LoggerFactory.getLogger(Consumer.class);
private static final String topic = "tbl_stream";
public static void main(String[] args) throws InterruptedException {
Properties props = new Properties();
props.put("bootstrap.servers","192.168.1.48:9096,192.168.1.49:9096,192.168.1.50:9096");
props.put("group.id", "test_like2");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "earliest");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
Collection<TopicPartition> topicPartition = new ArrayList<TopicPartition>();
for(int i = 0; i<=5; i++) {
topicPartition.add(new TopicPartition(topic, i));
}
consumer.assign(topicPartition);
//从头开始消费:
consumer.seekToBeginning(topicPartition);
//从某个offset开始消费:
// for(int i=0;i<=5;i++) {
// consumer.seek(new TopicPartition(topic, i), 4800000);
// }
while (true) {
ConsumerRecords<String, String> records = consumer.poll(500);
for (ConsumerRecord<String, String> record : records) {
System.out.printf("partition = %d , offset = %d, key = %s, value = %s%n",record.partition(),record.offset(), record.key(), record.value());
}
}
}
}
说明
poll方法:
consumer调用poll方法后,会自动加入consumer group,通过持续调用poll保持可用;
consumer定期发送心跳到server、且必须在max.poll.interval.ms内调用poll才能保持alive;
so 如果在poll方法中处理消息,则要小心时间是否会超过max.poll.interval.ms;
如果可以预测消息处理的时间,就在poll中处理消息时,配置下poll吧:
max.poll.interval.ms(调用poll的最大间隔)
max.poll.records(每次调用poll返回的消息数量)
如果不可预测消息处理时间,建议poll方法只返回record,处理record放在其他线程;
(暂停自动提交offset、pause分区、不poll新消息,否则可能导致内存溢出)
手动设置分区和offset:
//设置分区:
Collection<TopicPartition> topicPartition = new ArrayList<TopicPartition>();
//设置消费者的分区号为1和2,可以增加1个或多个分区
partition1 = new TopicPartition(topic, 1);
partition2 = new TopicPartition(topic, 2);
topicPartition.add(partition1);
topicPartition.add(partition2);
consumer.assign(topicPartition);
//设置消费者从这些分区的最初位置消费
consumer.seekToBeginning(topicPartition);
//设置消费者从某个分区的某个offset消费
consumer.seek(partition1 , 4800000);
consumer.seek(partition2 , 5000000);
手动提交offset
积累够一定数量则执行计算并手动提交,防止计算时突然failed,但自动提交了offset,导致消息丢失。
props.put("enable.auto.commit", "false");
final int minBatchSize = 200;
List<ConsumerRecord<String, String>> buffer = new ArrayList<>();
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
buffer.add(record);
}
if (buffer.size() >= minBatchSize) {
insertIntoDb(buffer);
consumer.commitSync();
buffer.clear();
}
按分区精确的手动提交offset
try {
while(running) {
ConsumerRecords<String, String> records = consumer.poll(Long.MAX_VALUE);
for (TopicPartition partition : records.partitions()) {
List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
for (ConsumerRecord<String, String> record : partitionRecords) {
System.out.println(record.offset() + ": " + record.value());
}
long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();
consumer.commitSync(Collections.singletonMap(partition, new OffsetAndMetadata(lastOffset + 1)));
}
}
} finally {
consumer.close();
}