从指定offset消费kafka数据
java从指定offset消费kafka数据
代码
import com.google.common.collect.Maps;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
import java.util.Arrays;
import java.util.Map;
/**
* Description
* Create by haohongtao
* Date 2021/4/20 3:02 下午
*/
@Slf4j
public class KafkaConsumerTest {
private static KafkaConsumer<String, String> consumer;
//初始化consumer,这段代码可以根据不同情况自定义
static {
Map<String, Object> props = Maps.newHashMap();
//集群地址
props.put("bootstrap.servers", "test-kafka..com:9092");
//设置我们独特的消费者的组id,每次不要一样,一个offset只能被同一个group消费一次的
props.put("group.id", "test-"+System.currentTimeMillis());
//设置手动提交
props.put("enable.auto.commit", "false");
//这个可以设置大一点
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//我一般测试单条报错数据,1
props.put("max.poll.records", 1);
consumer = new KafkaConsumer<>(props);
}
public static void consume(String topicName, Integer partition, Integer offset) {
//用于分配topic和partition
consumer.assign(Arrays.asList(new TopicPartition(topicName, partition)));
//不改变当前offset,指定从这个topic和partition的开始位置获取。
consumer.seek(new TopicPartition(topicName, partition),offset);
//consumer.seekToBeginning(Arrays.asList(new TopicPartition(topicName, partition)));
for (int i = 0; i < 1; i++) {
//poll消息timeout可以设置大一点,由此设置1000,拿不到数据,debug查看cluster发现没有信息
ConsumerRecords<String, String> records = consumer.poll(30000);
log.info("records length = {}", records.count());
for (ConsumerRecord record : records) {
log.info("topic = {}, partition = {}, offset = {}, key = {}, value = {}\n",
record.topic(), record.partition(), record.offset(),
record.key(), record.value());
}
}
}
public static void main(String[] args) {
String topicName = "my_topic_test";
Integer partition = 4;
Integer offset = 14103;
consume(topicName, partition, offset);
}
}
问题
消费不到数据
可能原因1:
//poll消息timeout可以设置大一点,由此设置1000,拿不到数据,debug查看cluster发现没有信息
ConsumerRecords<String, String> records = consumer.poll(30000);
spark版本
比较简单,在properties里设置即可
后续补充代码
flink版本
铜spark