几种kafka多线程消费方式

kafka API https://kafka.apache.org/0100/javadoc/index.html?org/apache/kafka/clients/consumer/KafkaConsumer.html

知乎关于多线程的回答https://www.zhihu.com/question/57483708/answer/153185829

1、高级新api消费者，一个线程一个消费者。

import com.atguigu.datacosumer.util.PropertyUtil;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Arrays;
import java.util.Properties;

/**
 * @author wade
 * @create 2019-03-20 12:26
 */
public class MyTestThreads {
    public static void main(String[] args) {
        /*
        a进行了消费ddd分区是0偏移量是15
        b进行了消费eee分区是2偏移量是17
        a进行了消费wade分区是1偏移量是17
        a进行了消费pual分区是0偏移量是16
         */

        //因为KafkaConsumer不是线程安全的，使用一个对象会报异常
        //ConcurrentModificationException: KafkaConsumer is not safe for multi-threaded access
        new Thread(new MyConsumerThread(),"a").start();

        new Thread(new MyConsumerThread(),"b").start();


    }
}

class MyConsumerThread implements Runnable {
    KafkaConsumer<String, String> consumer;
    public  MyConsumerThread(){
        Properties properties = PropertyUtil.properties;
       consumer = new KafkaConsumer<>(properties);

    }
    @Override
    public void run() {
        consumer.subscribe(Arrays.asList("dai"));
        while(true){
            ConsumerRecords<String,String> records = consumer.poll(1000);
            for (ConsumerRecord<String, String> record : records) {
                System.out.println(Thread.currentThread().getName()+"进行了消费"+record.value()+"分区是"+record.partition()+"偏移量是"+record.offset());
            }

        }

    }
    //./kafka-consumer-groups.sh --bootstrap-server hadoop103:9092 --new-consumer --group g1  --describe
    /*
    TOPIC                          PARTITION  CURRENT-OFFSET  LOG-END-OFFSET  LAG        CONSUMER-ID                                       HOST                           CLIENT-ID
dai                            0          19              19              0          consumer-1-de94a73e-78ab-4097-9a4b-05a44b0efdfa   /192.168.11.1                  consumer-1
dai                            1          20              20              0          consumer-1-de94a73e-78ab-4097-9a4b-05a44b0efdfa   /192.168.11.1                  consumer-1
dai                            2          20              20              0          consumer-2-da47cf63-fcc8-44b1-ae9e-760dba4df284   /192.168.11.1
     */
}

2、高级旧api消费者，一个消费者3个线程

import com.atguigu.datacosumer.util.PropertyUtil;
import kafka.consumer.*;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;
import org.apache.commons.lang.ObjectUtils;
import scala.Int;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/**
 * @author wade
 * @create 2019-03-20 15:50
 */
public class MyTestThreads2 {


    public static void main(String[] args) {
        //这种是老版本的高级api  还要连 zk
        Properties properties = PropertyUtil.properties;
        ConsumerConfig config = new ConsumerConfig(properties);
        ConsumerConnector connector = Consumer.createJavaConsumerConnector(config);
        HashMap<String, Integer> map = new HashMap<>();
        map.put("dai",3);
        Map<String, List<KafkaStream<byte[], byte[]>>> messageStreams = connector.createMessageStreams(map);

        List<KafkaStream<byte[], byte[]>> kafkaStreams = messageStreams.get("dai");


        for (KafkaStream<byte[], byte[]> stream : kafkaStreams) {
            new Thread(new MyThreads(stream)).start();
        }
    }
}
class MyThreads implements Runnable {
    KafkaStream<byte[], byte[]> stream = null ;

    public MyThreads (KafkaStream<byte[], byte[]> stream){
        this.stream = stream;
    }

    @Override
    public void run() {
        ConsumerIterator<byte[], byte[]> iterator = stream.iterator();
        while (iterator.hasNext()){
            MessageAndMetadata<byte[], byte[]> metadata = iterator.next();
            System.out.println(
                    Thread.currentThread().getName() + "消费了 =>>" + new String(metadata.message())+
                            "主题=>"+metadata.topic()+
                            "分区=>" +metadata.partition()  +
                            "偏移量=>" + metadata.offset()

            );
        }
    }
}
//     ./kafka-consumer-groups.sh --zookeeper hadoop103:2181 --group g1  --describe
/*
TOPIC                          PARTITION  CURRENT-OFFSET  LOG-END-OFFSET  LAG        CONSUMER-ID
dai                            0          19              20              1          g1_DESKTOP-HGSVH26-1553070141911-a078e09d
dai                            1          20              22              2          g1_DESKTOP-HGSVH26-1553070141911-a078e09d
dai                            2          20              21              1          g1_DESKTOP-HGSVH26-1553070141911-a078e09d

 */

3、高级新api实现消费和处理解耦

import com.atguigu.datacosumer.util.PropertyUtil;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import sun.applet.Main;

import java.util.Arrays;
import java.util.LinkedList;
import java.util.Properties;
import java.util.concurrent.LinkedBlockingQueue;

/**
 * @author wade
 * @create 2019-03-20 18:04
 */
public class MyThreadsTest4 {
    public static void main(String[] args) throws InterruptedException {
        KafkaConsumer<String, String> consumer;
        Properties properties = PropertyUtil.properties;
        consumer = new KafkaConsumer<>(properties);
        consumer.subscribe(Arrays.asList("dai"));
        ConsumerRecords<String, String> records ;

        /*
        线程安全，获取如果没有 阻塞，添加如果多了 阻塞 ，多线程数据共享，类似消息队列
         */
        LinkedBlockingQueue<ConsumerRecords<String, String>> list = new LinkedBlockingQueue();


            new Thread(new MyThread4(list),"bb").start();

            new Thread(new MyThread4(list),"aa").start();

            while (true){

                records = consumer.poll(1000);

                list.put(records);
           //建议打开来主动提交
            // 默认的自动提交会造成offset的提交不及时，关闭再启动的时候会重复消费
            //避免不了数据丢失
         // consumer.commitAsync();
        }



    }
}
/**
 * 消费和处理解耦
 * 一个或多个消费者线程来做所有的数据消费，把ConsumerRecords实例存到一个被多个处理线程或线程池
 * 消费的阻塞队列
 * 好处：不限制消费和处理的线程，让 一个消费者来满足多个处理线程，避免了线程数被分区数所限制
 *      理解 ：(因为 不解耦的情况下，消费和处理在一起，offset提交的原因，消费线程被分区数限制，多的线程都是空转。
 *          而解耦了，处理线程完全不受限制，消费线程仍然限制
 *      )
 * 坏处 : 顺序是一个问题， 多个处理线程顺序无法保证，先从阻塞队列获得的数据 可能比后面获得的数据处理时间晚
 *  坏处 ： 手动提交offset变得很难，可能数据丢失和重复消费
 *
 * 2. Decouple Consumption and Processing
 * Another alternative is to have one or more consumer threads that do all data consumption and hands off ConsumerRecords instances to a blocking queue consumed by a pool of processor threads that actually handle the record processing.
 * This option likewise has pros and cons:
 * PRO: This option allows independently scaling the number of consumers and processors.
 *      This makes it possible to have a single consumer that feeds many processor threads, avoiding any limitation on partitions.
 * CON: Guaranteeing order across the processors requires particular care as the threads will execute independently an earlier chunk of data may actually be processed after a later chunk of data just due to the luck of thread execution timing.
 *      For processing that has no ordering requirements this is not a problem.
 * CON: Manually committing the position becomes harder as it requires that all threads co-ordinate to ensure that processing is complete for that partition.
 *      There are many possible variations on this approach. For example each processor thread can have its own queue,
 *      and the consumer threads can hash into these queues using the TopicPartition to ensure in-order consumption and simplify commit.
 */

class MyThread4 implements Runnable {

    LinkedBlockingQueue<ConsumerRecords<String, String>> list ;

    public MyThread4 (LinkedBlockingQueue<ConsumerRecords<String, String>> list){
        this.list = list;
    }

    @Override
    public void run() {
        while (true) {
            ConsumerRecords<String, String> consumerRecords;
            try {
                consumerRecords = list.take();
                for (ConsumerRecord<String, String> consumerRecord : consumerRecords) {
                    System.out.println(Thread.currentThread().getName()
                            +"消费了:" + consumerRecord.value()
                            +"  分区："+consumerRecord.partition()
                            +"偏移量是:" + consumerRecord.offset()
                    );
                }
            } catch (InterruptedException e) {
                e.printStackTrace();
            }


        }
    }
}

几种kafka多线程消费方式

猜你喜欢