kafka编程(java)

准备

导入kafka-clients-1.1.0.jar

producer

demo

package com.leetech.kafka;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;

public class Producer{
    private static final String topic = "kafkaTopic";
    public static void main(String[] args) throws Exception {
        Properties props = new Properties();
        props.put("bootstrap.servers", "10.0.8.174:9096,10.0.8.231:9096");
        props.put("acks", "all");
        props.put("retries", "0");
        props.put("batch.size", 16384);
        props.put("linger.ms", 1);
        props.put("buffer.memory", 33554432);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        //生产者实例
        KafkaProducer producer = new KafkaProducer(props);
        // 发送业务消息
        i=1;
        j=1;
        while (true) {
            Thread.sleep(1000);
            producer.send(new ProducerRecord<String, String>(topic, "key:" + i, "value:" + j));
            System.out.println("key:" + i + " " + "value:" + j);
        }
    }
}

说明

  • batch.size:每个分区的缓存数据大小。
  • buffer.memory:producer可用的缓冲区的大小。
  • send方法
public Future<RecordMetadata> send(ProducerRecord<K,V> record,Callback callback)
//异步发送一条消息,当保存到producer的buffer area后,立即返回并调用callback。
//发送成功的返回值是个RecordMetadata对象,包含offset、分区号、时间戳。

//通过get方法阻塞:
ProducerRecord<byte[],byte[]> record = new ProducerRecord<byte[],byte[]>("my-topic", key, value)
producer.send(record).get();
//无阻塞,发送后调用回调函数:
ProducerRecord<byte[],byte[]> record = new ProducerRecord<byte[],byte[]>("the-topic", key, value);
producer.send(myRecord,
               new Callback() {
                   public void onCompletion(RecordMetadata metadata, Exception e) {
                       if(e != null)
                           e.printStackTrace();
                       System.out.println("The offset of the record we just sent is: " + metadata.offset());
                   }
               });

consumer

demo

常规消费

 package com.leetech.kafka;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Properties;

public class Consumer{
    private static final Logger logger = LoggerFactory.getLogger(Consumer.class);
    private static final String topic = "flumetest";

    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", "10.0.8.174:9096,10.0.8.231:9096,10.0.8.34:9096");
        props.put("group.id", "tbl_stream");
        props.put("enable.auto.commit", "true");
        props.put("auto.commit.interval.ms", "1000");
        props.put("auto.offset.reset", "earliest");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

        KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
        consumer.subscribe(Arrays.asList(topic));
        while (true) {
            ConsumerRecords<String, String> records = consumer.poll(500);
            for (ConsumerRecord<String, String> record : records) {
                System.out.printf("offset = %d, key = %s, value = %s%n", record.offset(), record.key(), record.value());
            }
        }
    }
}

手动设置offset消费


package com.leetech.kafka;

import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.Set;

public class Consumer_Ruide_Reverse{
    private static final Logger logger = LoggerFactory.getLogger(Consumer.class);
    private static final String topic = "tbl_stream";

    public static void main(String[] args) throws InterruptedException {
        Properties props = new Properties();
        props.put("bootstrap.servers","192.168.1.48:9096,192.168.1.49:9096,192.168.1.50:9096");
        props.put("group.id", "test_like2");
        props.put("enable.auto.commit", "true");
        props.put("auto.commit.interval.ms", "1000");
        props.put("auto.offset.reset", "earliest");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");

        KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
        Collection<TopicPartition> topicPartition = new ArrayList<TopicPartition>();
        for(int i = 0; i<=5; i++) {
            topicPartition.add(new TopicPartition(topic, i));
        }
        consumer.assign(topicPartition);

//从头开始消费:
        consumer.seekToBeginning(topicPartition); 

//从某个offset开始消费:
//        for(int i=0;i<=5;i++) {
//            consumer.seek(new TopicPartition(topic, i), 4800000);
//        }

        while (true) {
            ConsumerRecords<String, String> records = consumer.poll(500);
            for (ConsumerRecord<String, String> record : records) {
                System.out.printf("partition = %d , offset = %d, key = %s, value = %s%n",record.partition(),record.offset(), record.key(), record.value());
            }
        }
    }
}

说明

poll方法:

consumer调用poll方法后,会自动加入consumer group,通过持续调用poll保持可用;
consumer定期发送心跳到server、且必须在max.poll.interval.ms内调用poll才能保持alive;
so 如果在poll方法中处理消息,则要小心时间是否会超过max.poll.interval.ms;
如果可以预测消息处理的时间,就在poll中处理消息时,配置下poll吧:

max.poll.interval.ms(调用poll的最大间隔)
max.poll.records(每次调用poll返回的消息数量)

如果不可预测消息处理时间,建议poll方法只返回record,处理record放在其他线程;
(暂停自动提交offset、pause分区、不poll新消息,否则可能导致内存溢出)

手动设置分区和offset:

//设置分区:
Collection<TopicPartition> topicPartition = new ArrayList<TopicPartition>();
//设置消费者的分区号为1和2,可以增加1个或多个分区
partition1 = new TopicPartition(topic, 1);
partition2 = new TopicPartition(topic, 2);
topicPartition.add(partition1);
topicPartition.add(partition2);
consumer.assign(topicPartition);
//设置消费者从这些分区的最初位置消费
consumer.seekToBeginning(topicPartition);
//设置消费者从某个分区的某个offset消费
consumer.seek(partition1 , 4800000);
consumer.seek(partition2 , 5000000);

手动提交offset

积累够一定数量则执行计算并手动提交,防止计算时突然failed,但自动提交了offset,导致消息丢失。

props.put("enable.auto.commit", "false");
final int minBatchSize = 200;
List<ConsumerRecord<String, String>> buffer = new ArrayList<>();
while (true) {
    ConsumerRecords<String, String> records = consumer.poll(100);
    for (ConsumerRecord<String, String> record : records) {
        buffer.add(record);
    }
    if (buffer.size() >= minBatchSize) {
        insertIntoDb(buffer);
        consumer.commitSync();
        buffer.clear();
    }

按分区精确的手动提交offset

try {
         while(running) {
             ConsumerRecords<String, String> records = consumer.poll(Long.MAX_VALUE);
             for (TopicPartition partition : records.partitions()) {
                 List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
                 for (ConsumerRecord<String, String> record : partitionRecords) {
                     System.out.println(record.offset() + ": " + record.value());
                 }
                 long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();
                 consumer.commitSync(Collections.singletonMap(partition, new OffsetAndMetadata(lastOffset + 1)));
             }
         }
     } finally {
       consumer.close();
     }

猜你喜欢

转载自blog.csdn.net/lk7688535/article/details/80449963