kafka 02 - three important kafka clients

kafka 02 - three important kafka clients

1 Introduction

1.1 About the installation of Kafka

1.2 Introduction to common clients

  • AdminClient API:
    Allows management and detection of Topics, Brokers, and other Kafka objects.
  • Producer API:
    Publish messages to one or more APIs.
  • Consumer API:
    Subscribe to one or more Topics and process the generated messages.

1.3 Dependencies

  • as follows:
    Insert image description here

            <!--kafka客户端-->
            <dependency>
                <groupId>org.apache.kafka</groupId>
                <artifactId>kafka-clients</artifactId>
                <version>2.8.2</version>
            </dependency>
            
    
  • complete pom

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <parent>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-parent</artifactId>
            <version>2.7.6</version>
            <relativePath/> <!-- lookup parent from repository -->
        </parent>
        <groupId>com.liu.susu</groupId>
        <artifactId>kafka-api</artifactId>
        <version>0.0.1-SNAPSHOT</version>
        <name>kafka-api</name>
        <description>kafka-api</description>
        <properties>
            <java.version>1.8</java.version>
        </properties>
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-web</artifactId>
            </dependency>
    
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-test</artifactId>
                <scope>test</scope>
            </dependency>
    
            <dependency>
                <groupId>ch.qos.logback</groupId>
                <artifactId>logback-classic</artifactId>
                <version>1.2.11</version>
            </dependency>
    
            <!--kafka客户端-->
            <dependency>
                <groupId>org.apache.kafka</groupId>
                <artifactId>kafka-clients</artifactId>
                <version>2.8.2</version>
            </dependency>
    
        </dependencies>
    
        <build>
            <plugins>
                <plugin>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-maven-plugin</artifactId>
                </plugin>
            </plugins>
        </build>
    
    </project>
    

2. AdminClient

2.1 Admin Configs

2.2 AdminClient API

2.2.1 Set AdminClient object

  • Please refer to the official website for detailed configuration. Simple configuration is as follows:

    package com.liu.susu.admin;
    
    import org.apache.kafka.clients.admin.AdminClient;
    import org.apache.kafka.clients.admin.AdminClientConfig;
    
    import java.util.Arrays;
    import java.util.List;
    import java.util.Properties;
    
    /**
     * @Description
     * @Author susu
     */
    public class AdminExample1 {
          
          
    
        public final static String TOPIC_NAME = "";
    
        /**
         * 1. 创建并设置 AdminClient 对象
         */
        public static AdminClient getAdminClient(){
          
          
            Properties properties = new Properties();
            properties.setProperty(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka服务IP:9092");
    
            AdminClient adminClient = AdminClient.create(properties);
            return adminClient;
        }
    
        public static void main(String[] args) {
          
          
            //1. 测试 创建并设置 AdminClient 对象
            AdminClient adminClient = AdminExample1.getAdminClient();
            System.out.println("adminClient==>" + adminClient);
        }
    }
    
    

    Insert image description here

2.2.2 Create topic + get topic list

  • as follows:

        /**
         * 2. 创建topic
         */
        public static void createTopic(){
          
          
            AdminClient adminClient = getAdminClient();
            // 副本因子
            short rs = 1;
            NewTopic newTopic = new NewTopic("new_topic_test", 1, rs);//new_topic_test 是 topic的name
            CreateTopicsResult topics = adminClient.createTopics(Arrays.asList(newTopic));
            System.out.println("创建的新topic为::::" + topics);
        }
    
        /**
         * 3. 获取已经创建的 topic 的列表
         */
        public static ListTopicsResult getTopicList(){
          
          
            AdminClient adminClient = getAdminClient();
            ListTopicsResult topicList = adminClient.listTopics();
            return topicList;
        }
    
  • The test is as follows:

        public static void main(String[] args) throws ExecutionException, InterruptedException {
          
          
            //1. 测试 创建并设置 AdminClient 对象
    //        AdminClient adminClient = AdminExample1.getAdminClient();
    //        System.out.println("adminClient==>" + adminClient);
    
            //2. 测试 创建topic
            createTopic();
    
            //3. 获取已经创建的 topic 的列表
            ListTopicsResult topicList = getTopicList();
            Collection<TopicListing> topicListings = topicList.listings().get();
            for (TopicListing topic : topicListings) {
          
          
                System.out.println(topic);
            }
    
        }
    

    Insert image description here
    Insert image description here

2.2.3 Delete topic

  • as follows:
        /**
         * 4. 删除 topic
         */
        public static void deleteTopic(String topicName) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            DeleteTopicsResult deleteTopicsResult = adminClient.deleteTopics(Arrays.asList(topicName));
            deleteTopicsResult.all().get();
        }
    

2.2.4 View topic description information

  • as follows:

        /**
         * 5. 获取描述 topic 的信息
         */
        public static void getDescribeTopics(String topicName) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            DescribeTopicsResult result = adminClient.describeTopics(Arrays.asList(topicName));
            Map<String, TopicDescription> descriptionMap = result.all().get();
            descriptionMap.forEach((k,v)->{
          
          
                System.out.println("k==>"+k +",v===>"+v);
            });
        }
    

    Insert image description here

    k==>susu-topic,v===>(name=susu-topic, internal=false, partitions=(partition=0, leader=IP:9092 (id: 0 rack: null), replicas=IP:9092 (id: 0 rack: null), isr=IP:9092 (id: 0 rack: null)), authorizedOperations=null)
    

2.2.5 View topic configuration information

  • as follows:
        /**
         * 6. 获取 topic 的配置信息
         */
        public static void getDescribeConfig(String topicName) throws ExecutionException, InterruptedException{
          
          
            AdminClient adminClient = getAdminClient();
            ConfigResource resource = new ConfigResource(ConfigResource.Type.TOPIC,topicName);
            DescribeConfigsResult configsResult = adminClient.describeConfigs(Arrays.asList(resource));
            Map<ConfigResource, Config> configMap = configsResult.all().get();
            configMap.forEach((k,v)->{
          
          
                System.out.println("k==>"+k +",v===>"+v);
            });
        }
        
        //查看某一项配置(eg:message.downconversion.enable)的值
        Config config = configMap.get(resource);
        ConfigEntry configEntry = config.get("message.downconversion.enable");
        System.out.println("message.downconversion.enable===>" + configEntry.value());
    
    Insert image description here

2.2.6 Modify topic configuration information

  • as follows:
        /**
         * 7. 修改 topic 的配置信息
         *    本例修改 message.downconversion.enable,将默认的 true 改为 false
         */
        public static void editConfig(String topicName) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            Map<ConfigResource,Config> configMap = new HashMap<>();
    
            ConfigResource configResource = new ConfigResource(ConfigResource.Type.TOPIC,topicName);
    
            String keyName = "message.downconversion.enable";
            String value = "false";
            ConfigEntry configEntry = new ConfigEntry(keyName, value);
            Config config = new Config(Arrays.asList(configEntry));
    
            configMap.put(configResource,config);
    
            AlterConfigsResult alterConfigsResult = adminClient.alterConfigs(configMap);
            alterConfigsResult.all().get();
        }
    
  • The effect is as follows:
    Insert image description here

2.2.7 Add Partition

2.2.7.1 Related concepts

  • Topic: Topic is a virtual concept consisting of 1 to multiple Partitions. It can be understood as a queue. Both producers and consumers are oriented to a Topic.
  • Partition: partition, the actual message storage unit. In order to achieve scalability, a very large Topic can be distributed to multiple Brokers.A Topic can be divided into multiple Partitions, each Partition is an ordered queue (partition ordering, global ordering cannot be guaranteed)
  • Producer: Message producer, the role that publishes messages to Kafka.
  • Consumer: Message consumer, a client that pulls messages from Kafka for consumption.
  • Broker: Broker, a Kafka server is a Broker, a cluster consists of multiple Brokers, and one Broker can accommodate multiple Topics.

2.2.7.2 Demonstration

  • code show as below:
        /**
         * 8. 增加 topic 的Partitions
         */
        public static void addPartitionNum(String topicName, int partitionNum) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            Map<String,NewPartitions> partitionsMap = new HashMap<>() ;
    
            NewPartitions newPartitions = NewPartitions.increaseTo(partitionNum);//增加到的数量
    
            partitionsMap.put(topicName,newPartitions);
    
            CreatePartitionsResult request = adminClient.createPartitions(partitionsMap);
            request.all().get();
        }
    
  • The effect is as follows:
    Insert image description here

2.3 Attached code

  • as follows:
    package com.liu.susu.admin;
    
    import org.apache.kafka.clients.admin.*;
    import org.apache.kafka.common.KafkaFuture;
    import org.apache.kafka.common.config.ConfigResource;
    import org.apache.kafka.common.requests.CreatePartitionsRequest;
    
    import java.util.*;
    import java.util.concurrent.ExecutionException;
    
    /**
     * @Description
     * @Author susu
     */
    public class AdminExample1 {
          
          
    
        public final static String TOPIC_NAME = "new_topic_test";
    
        /**
         * 1. 创建并设置 AdminClient 对象
         */
        public static AdminClient getAdminClient(){
          
          
            Properties properties = new Properties();
            properties.setProperty(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "43.143.190.116:9092");
    
            AdminClient adminClient = AdminClient.create(properties);
            return adminClient;
        }
    
    
        /**
         * 2. 创建topic
         */
        public static void createTopic(){
          
          
            AdminClient adminClient = getAdminClient();
            // 副本因子
            short rs = 1;
            NewTopic newTopic = new NewTopic("new_topic_test", 1, rs);
            CreateTopicsResult topics = adminClient.createTopics(Arrays.asList(newTopic));
            System.out.println("创建的新topic为::::" + topics);
        }
    
        /**
         * 3. 获取已经创建的 topic 的列表
         */
        public static ListTopicsResult getTopicList(){
          
          
            AdminClient adminClient = getAdminClient();
            ListTopicsResult topicList = adminClient.listTopics();
            return topicList;
        }
    
        /**
         * 4. 删除 topic
         */
        public static void deleteTopic(String topicName) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            DeleteTopicsResult deleteTopicsResult = adminClient.deleteTopics(Arrays.asList(topicName));
            deleteTopicsResult.all().get();
        }
    
        /**
         * 5. 获取描述 topic 的信息
         */
        public static void getDescribeTopics(String topicName) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            DescribeTopicsResult result = adminClient.describeTopics(Arrays.asList(topicName));
            Map<String, TopicDescription> descriptionMap = result.all().get();
            descriptionMap.forEach((k,v)->{
          
          
                System.out.println("k==>"+k +",v===>"+v);
            });
        }
    
        /**
         * 6. 获取 topic 的配置信息
         */
        public static void getDescribeConfig(String topicName) throws ExecutionException, InterruptedException{
          
          
            AdminClient adminClient = getAdminClient();
            ConfigResource resource = new ConfigResource(ConfigResource.Type.TOPIC,topicName);
            DescribeConfigsResult configsResult = adminClient.describeConfigs(Arrays.asList(resource));
            Map<ConfigResource, Config> configMap = configsResult.all().get();
            configMap.forEach((k,v)->{
          
          
                System.out.println("\nk==>"+k +",v===>"+v);
            });
    
            //查看某一项配置(eg:message.downconversion.enable)的值
            Config config = configMap.get(resource);
            ConfigEntry configEntry = config.get("message.downconversion.enable");
            System.out.println("message.downconversion.enable===>" + configEntry.value());
        }
    
        /**
         * 7. 修改 topic 的配置信息
         *    本例修改 message.downconversion.enable,将默认的 true 改为 false
         */
        public static void editConfig(String topicName) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            Map<ConfigResource,Config> configMap = new HashMap<>();
    
            ConfigResource configResource = new ConfigResource(ConfigResource.Type.TOPIC,topicName);
    
            String keyName = "message.downconversion.enable";
            String value = "false";
            ConfigEntry configEntry = new ConfigEntry(keyName, value);
            Config config = new Config(Arrays.asList(configEntry));
    
            configMap.put(configResource,config);
    
            AlterConfigsResult alterConfigsResult = adminClient.alterConfigs(configMap);
            alterConfigsResult.all().get();
        }
    
        /**
         * 8. 增加 topic 的Partitions
         */
        public static void addPartitionNum(String topicName, int partitionNum) throws ExecutionException, InterruptedException {
          
          
            AdminClient adminClient = getAdminClient();
            Map<String,NewPartitions> partitionsMap = new HashMap<>() ;
    
            NewPartitions newPartitions = NewPartitions.increaseTo(partitionNum);//增加到的数量
    
            partitionsMap.put(topicName,newPartitions);
    
            CreatePartitionsResult request = adminClient.createPartitions(partitionsMap);
            request.all().get();
        }
    
    
        public static void main(String[] args) throws ExecutionException, InterruptedException {
          
          
            //1. 测试 创建并设置 AdminClient 对象
    //        AdminClient adminClient = AdminExample1.getAdminClient();
    //        System.out.println("adminClient==>" + adminClient);
    
    
            //2. 测试 创建topic
    //        createTopic();
    
            //3. 获取已经创建的 topic 的列表
            ListTopicsResult topicList = getTopicList();
            Collection<TopicListing> topicListings = topicList.listings().get();
            for (TopicListing topic : topicListings) {
          
          
                System.out.println(topic);
            }
    
            // 4. 删除topic
    //        deleteTopic("new_topic_test");
    
            // 5.
    //        getDescribeTopics("susu-topic");
    
            //6. 获取 topic 的配置信息
    //        getDescribeConfig("susu-topic");
    
            // 7. 修改 topic 的配置信息
    //        editConfig("susu-topic");
    //
    //        System.out.println("\n=============修改之后的配置===========\n");
    //
    //        getDescribeConfig("susu-topic"); //修改之后再查看配置
    
            //8. 增加 topic 的Partitions
            addPartitionNum("susu-topic",2);
            System.out.println("添加完毕");
    
        }
    
    }
    
    

3. Producer API

3.1 Producer Configs

3.1.1 Refer to the official website

3.1.2 About the configuration of acks (message delivery guarantee)

Regarding the configuration of acks, the producer requires the number of acknowledgments the leader has received before considering the request complete. This controls the persistence of records sent. The following settings are allowed:

  • acks=0, if set to 0, then the producer willWill not wait for any confirmation from the server. (That is: ignore the message after it is sent, regardless of whether the message is written successfully)

    • The record is immediately added to the socket buffer and considered sent.
    • In this case, there is no guarantee that the server has received the record, and the retry configuration will not take effect (since the client will generally not be aware of any failures). The offset returned for each record will always be set to -1.
    • That is: in this case, after the message is sent, it is either not received at all or received once, so, 最多收到一次消息(收到0次或多次).
  • acks=1, which would mean that the leader will write the record to its local log, but will respond without waiting for full acknowledgment from all followers.

    • In this case, if the leader fails immediately after acknowledging the record, but before the followers replicate it, then the record will be lost.
    • Two situations:
      • One is, if the message is not received and there is no response to the repeated sending, it is still received once at this time;
      • The second is that if the message is received but there is a problem with the response, that is, the resend of the response is not received, and the message will be received repeatedly at this time, so many times.
    • That is: this situation 至少收到一次消息(一次或多次).
  • acks=all (or acks=-1), which means that the leader will wait for the complete set of synchronized replicas to comeconfirmation record

    • This guarantees that records will not be lost as long as at least one in-sync replica remains active. This is the strongest guarantee.
    • That is: in this case 收到消息有且仅有一次, if it is sent repeatedly, it will be rejected.
  • Note that enabling idempotency requires this configuration value to be "all". If conflicting configurations are set and idempotence is not explicitly enabled, idempotence is disabled.

3.2 Producer API

3.2.1 Asynchronous sending

  • code show as below:
    Insert image description here
    package com.liu.susu.producer;
    
    import org.apache.kafka.clients.producer.KafkaProducer;
    import org.apache.kafka.clients.producer.Producer;
    import org.apache.kafka.clients.producer.ProducerConfig;
    import org.apache.kafka.clients.producer.ProducerRecord;
    
    import java.util.Properties;
    
    /**
     * @Description
     * @Author susu
     */
    public class ProducerExample1 {
          
          
    
        public static Properties getProperties(){
          
          
            Properties properties = new Properties();
            properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "IP:9092");
            properties.put(ProducerConfig.ACKS_CONFIG, "all");
            properties.put(ProducerConfig.RETRIES_CONFIG, "0");
            properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "16348");
            properties.put(ProducerConfig.LINGER_MS_CONFIG, "1");
            properties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432");
            properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            return properties;
        }
    
        /**
         * 1. 发送消息(异步发送)
         *    1.1 一次发一条消息
         */
        public static void producerSendOne(String topicName){
          
          
            Properties properties = getProperties();
            //Producer对象
            Producer<String, String> producer = new KafkaProducer<>(properties);
            //消息对象
            ProducerRecord<String, String> producerRecord = new ProducerRecord<>(topicName,"num1","A-10001");
            //发送消息
            producer.send(producerRecord);
            //所有的通道打开都要记得关闭
            producer.close();
        }
        /**
         * 1. 发送消息(异步发送)
         *    1.2 一次发多条消息
         */
        public static void producerSendMore(String topicName){
          
          
            Properties properties = getProperties();
            //Producer对象
            Producer<String, String> producer = new KafkaProducer<>(properties);
            for (int i = 0; i < 5; i++) {
          
          
                //消息对象
                ProducerRecord<String, String> producerRecord = new ProducerRecord<>(topicName,"Record-"+i,"R-1000"+i);
                //发送消息
                producer.send(producerRecord);
            }
            producer.close();
        }
    
        public static void main(String[] args) {
          
          
            //1.1 一次发一条消息
    //        producerSendOne("susu-topic");
    
            //1.2 一次发多条消息
            producerSendMore("susu-topic");
        }
    
    }
    
    
  • The test results are as follows:
    Insert image description here

3.2.2 Asynchronous blocking sending (synchronous sending)

  • code show as below:

    package com.liu.susu.producer;
    
    import org.apache.kafka.clients.producer.*;
    
    import java.util.Properties;
    import java.util.concurrent.ExecutionException;
    import java.util.concurrent.Future;
    
    /**
     * @Description
     * @Author susu
     */
    public class ProducerExample2 {
          
          
    
        public static Properties getProperties(){
          
          
            Properties properties = new Properties();
            properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "43.143.190.116:9092");
            properties.put(ProducerConfig.ACKS_CONFIG, "all");
            properties.put(ProducerConfig.RETRIES_CONFIG, "0");
            properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "16348");
            properties.put(ProducerConfig.LINGER_MS_CONFIG, "1");
            properties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432");
            properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            return properties;
        }
        
        /**
         * 1. 异步阻塞发送(同步发送)
         */
        public static void producerSendMore(String topicName) throws ExecutionException, InterruptedException {
          
          
            Properties properties = getProperties();
            //Producer对象
            Producer<String, String> producer = new KafkaProducer<>(properties);
            for (int i = 0; i < 5; i++) {
          
          
                //消息对象
                ProducerRecord<String, String> producerRecord = new ProducerRecord<>(topicName,"Z-Record-"+i,"Z-R-1000"+i);
                //发送消息
    //            producer.send(producerRecord);
                Future<RecordMetadata> send = producer.send(producerRecord);
                RecordMetadata recordMetadata = send.get();//future.get会进行阻塞直到返回数据表示发送成功,才会继续下一条消息的发送
    
                System.out.println("Z-Record-"+i + ",partition-->"+recordMetadata.partition() + ",offset-->"+recordMetadata.offset());
    
            }
            producer.close();
        }
    
        public static void main(String[] args) throws ExecutionException, InterruptedException{
          
          
            //1. 异步阻塞发送(同步发送)
            producerSendMore("susu-topic");
        }
    
    }
    
    
  • The test is as follows:
    Insert image description here
    Insert image description here

3.2.3 Asynchronous sending and callback

  • The producer sends a message. After sending it, there is no need to wait for the broker to reply, and the following business logic is directly executed. You can provide a callback method to let the broker call the callback asynchronously to inform the producer of the result of message sending. In this way, there is no need to block and wait like asynchronous blocking after sending.

  • The effect is as follows:
    Insert image description here

  • code show as below:

    package com.liu.susu.producer;
    
    import org.apache.kafka.clients.producer.*;
    
    import java.util.Properties;
    import java.util.concurrent.ExecutionException;
    import java.util.concurrent.Future;
    
    /**
     * @Description
     * @Author susu
     */
    public class ProducerExample3 {
          
          
    
        public static Properties getProperties(){
          
          
            Properties properties = new Properties();
            properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "43.143.190.116:9092");
            properties.put(ProducerConfig.ACKS_CONFIG, "all");
            properties.put(ProducerConfig.RETRIES_CONFIG, "0");
            properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "16348");
            properties.put(ProducerConfig.LINGER_MS_CONFIG, "1");
            properties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432");
            properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            return properties;
        }
        
        /**
         * 1. 异步发送并回调
         */
        public static void producerSendMore(String topicName) throws ExecutionException, InterruptedException {
          
          
            Properties properties = getProperties();
            //Producer对象
            Producer<String, String> producer = new KafkaProducer<>(properties);
            for (int i = 0; i < 5; i++) {
          
          
                //消息对象
                ProducerRecord<String, String> producerRecord = new ProducerRecord<>(topicName,"H4-Record-"+i,"H4-R-1000"+i);
    
                //1 发送消息:异步发送并回调
                producer.send(producerRecord, (recordMetadata, exception) -> {
          
          
                    if(exception == null) {
          
          
                        System.out.println("partition-->"+recordMetadata.partition() + ",offset-->"+recordMetadata.offset());
                    }
                    else {
          
          
                        exception.printStackTrace();
                    }
                });
    
                //2 发送消息:异步发送并回调
    //            producer.send(producerRecord, new Callback() {
          
          
    //                @Override
    //                public void onCompletion(RecordMetadata recordMetadata, Exception e) {
          
          
    //                    if(e == null) {
          
          
    //                        System.out.println("partition-->"+recordMetadata.partition() + ",offset-->"+recordMetadata.offset());
    //                    }
    //                    else {
          
          
    //                        e.printStackTrace();
    //                    }
    //                }
    //            });
    
            }
            producer.close();// 要关闭Producer实例
        }
        public static void main(String[] args) throws ExecutionException, InterruptedException{
          
          
            //1. 异步发送并回调
            producerSendMore("susu-topic");
        }
    
    }
    
    

3.2.4 Summary (asynchronous blocking send and asynchronous send)

3.2.3.1 Asynchronous blocking send

  • This method can be understood as synchronous sending (ie: synchronization means sending one item at a time.).
    • It must be sent one by one. After the first response arrives, the second one will be requested. The result of each message will be judged, and future.get()it will block until the return data indicates that the sending is successful, and then the next message will be sent until the sending status of each message.
    • This way ifIf the sending fails, it will be retried and an exception will be thrown., until the maximum number of retries is reached. This method also ensures data reliability to the greatest extent and can record the corresponding result log.

3.2.3.2 Asynchronous sending

  • Asynchronous means sending in batches.
    • If set to asynchronous mode, you can run the producer to push data in batches, which will greatly improve the performance of the broker, but this will increase the risk of data loss.
    • Asynchronously, you can send one message or send multiple messages in batches. The characteristic is that you don’t need to wait for the first response (note that the unit here is times, because a single message can be a single message or a batch of data) and send the second message immediately. .

3.2.3.3 Reference

3.3 Producer custom Partition rules (load balancing)

3.3.1 Increase Partition to 3

  • As follows, 0, 1, 2:
    Insert image description here

3.3.2 Core code

  • as follows:
    Insert image description here
    Insert image description here

  • MyPartition.java

    package com.liu.susu.producer;
    
    import org.apache.kafka.clients.producer.Partitioner;
    import org.apache.kafka.common.Cluster;
    
    import java.util.Map;
    
    /**
     * @Description
     * @Author susu
     */
    public class MyPartition implements Partitioner {
          
          
    
    
        @Override
        public int partition(String topic, Object key, byte[] bytes, Object value, byte[] bytes1, Cluster cluster) {
          
          
            String newsKey = key + "";  //格式:"P-Record-"+i
    
            String newKeyNum = newsKey.substring(newsKey.length()-1);//取最后一位
            int keyNum = Integer.parseInt(newKeyNum);
    
            int partition = keyNum % 3;
    
            System.out.println("newsKey--->"+newsKey + ",newKeyNum-->"+newKeyNum+",partition-->"+partition);
    
            return partition;
        }
    
        @Override
        public void close() {
          
          
    
        }
    
        @Override
        public void configure(Map<String, ?> map) {
          
          
    
        }
    
    }
    
    
  • example:

    package com.liu.susu.producer;
    
    import org.apache.kafka.clients.producer.*;
    
    import java.util.Properties;
    import java.util.concurrent.ExecutionException;
    
    /**
     * @Description
     * @Author susu
     */
    public class ProducerExample4 {
          
          
    
        public static Properties getProperties(){
          
          
            Properties properties = new Properties();
            properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "43.143.190.116:9092");
            properties.put(ProducerConfig.ACKS_CONFIG, "all");
            properties.put(ProducerConfig.RETRIES_CONFIG, "0");
            properties.put(ProducerConfig.BATCH_SIZE_CONFIG, "16348");
            properties.put(ProducerConfig.LINGER_MS_CONFIG, "1");
            properties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, "33554432");
    
            properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, "com.liu.susu.producer.MyPartition");
    
            properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer");
            return properties;
        }
        
        /**
         * 1. 异步发送并回调
         */
        public static void producerSendMore(String topicName) throws ExecutionException, InterruptedException {
          
          
            Properties properties = getProperties();
            //Producer对象
            Producer<String, String> producer = new KafkaProducer<>(properties);
            for (int i = 1; i <= 15; i++) {
          
          
                //消息对象
                ProducerRecord<String, String> producerRecord = new ProducerRecord<>(topicName,"P-Record-"+i,"P-R-1000"+i);
    
                //发送消息:异步发送并回调
                producer.send(producerRecord, new Callback() {
          
          
                    @Override
                    public void onCompletion(RecordMetadata recordMetadata, Exception e) {
          
          
                        if(e == null) {
          
          
                            System.out.println("partition-->"+recordMetadata.partition() + ",offset-->"+recordMetadata.offset());
                        }
                        else {
          
          
                            e.printStackTrace();
                        }
                    }
                });
    
            }
            producer.close();// 要关闭Producer实例
        }
        public static void main(String[] args) throws ExecutionException, InterruptedException{
          
          
            // 异步发送并回调
            producerSendMore("susu-topic");
        }
    
    }
    
    

3.3.3 Effect

  • Using asynchronous send and callback, the effect is as follows:
    Insert image description here

4. Consumers

4.1 Consumer Configs

4.2 Examples of consumer consumption

4.2.1 Official website reference

4.2.2 Simple getting started example - automatic offset submission

  • In this case, what has been consumed will not be consumed again. The code is as follows:
    Insert image description here

    package com.liu.susu.consumer;
    
    import org.apache.kafka.clients.consumer.ConsumerRecord;
    import org.apache.kafka.clients.consumer.ConsumerRecords;
    import org.apache.kafka.clients.consumer.KafkaConsumer;
    
    import java.time.Duration;
    import java.util.Arrays;
    import java.util.Properties;
    
    /**
     * @Description
     * @Author susu
     */
    public class ConsumerExample1 {
          
          
    
        public static void consumerTest(){
          
          
            Properties props = new Properties();
            props.setProperty("bootstrap.servers", "IP:9092");
            props.setProperty("group.id", "test");
            props.setProperty("enable.auto.commit", "true");//设置enable.auto.commit意味着自动提交偏移量,其频率由配置auto.commit.interval.ms控制
            props.setProperty("auto.commit.interval.ms", "1000");
            props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
            props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
    
            /**
             * 消费订阅哪一个topic或者哪几个topic
             *   我这里:消费者订阅了主题susu-topic和susu-topic-2,作为消费者组test的一部分,并配置了group.id。
             */
            consumer.subscribe(Arrays.asList("susu-topic", "susu-topic-2"));
    
            while (true) {
          
          
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));//每100毫秒拉取一次
                for (ConsumerRecord<String, String> record : records)
                    System.out.printf("topic = %s,partition = %d, offset = %d, key = %s, value = %s%n",
                            record.topic(),record.partition(),record.offset(), record.key(), record.value());
            }
        }
    
        public static void main(String[] args) {
          
          
            consumerTest();
        }
    
    }
    
    
  • The effect is as follows:
    Insert image description here

4.2.2 Manual offset control

4.2.3.1 Explanation

  • Users can also control when a record is considered consumed, thereby committing its offsets, rather than relying on consumers to periodically commit consumed offsets. This is useful when the consumption of a message is combined with some processing logic, so the message should not be considered consumed until it has completed processing.
  • In this example, we will take a batch of records and batch them in memory. When we have enough records, we will insert them into the database.If we allow offsets to be automatically committed as in the previous example, then the record will be considered consumed after it is returned to the user in the poll.This way, it's possible for our process to fail after batching the records, but before inserting them into the database.
    To avoid this we will only do this after inserting the corresponding record into the database
    Manually commit offsets
    . This gives us precise control over when records are consumed. This raises the opposite possibility: the process could fail in the interval after inserting into the database but before committing (although this might only be a few milliseconds, it's possible). In this case, the process that takes over consumption will consume from the last committed offset and will repeatedly insert the last batch of data. Using this approach, Kafka provides what is often referred to as an "at-least-once" delivery guarantee, since each record may be delivered only once, butCan be copied in case of failure

4.2.3.2 Code

  • code show as below:
    Insert image description here
    Insert image description here

    package com.liu.susu.consumer;
    
    import org.apache.kafka.clients.consumer.ConsumerRecord;
    import org.apache.kafka.clients.consumer.ConsumerRecords;
    import org.apache.kafka.clients.consumer.KafkaConsumer;
    
    import java.time.Duration;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    import java.util.Properties;
    
    /**
     * @Description 手动提交
     * @Author susu
     */
    public class ConsumerExample2 {
          
          
    
        public static void consumerTest(){
          
          
            Properties props = new Properties();
            props.setProperty("bootstrap.servers", "43.143.190.116:9092");
            props.setProperty("group.id", "test");
            props.setProperty("enable.auto.commit", "false");//false 手动提交
            props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
            props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
    
    //        consumer.subscribe(Arrays.asList("susu-topic", "susu-topic-2"));
            consumer.subscribe(Arrays.asList("susu-topic"));
    
            final int minBatchSize = 20;
            List<ConsumerRecord<String, String>> buffer = new ArrayList<>();
    
            while (true) {
          
          
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
                for (ConsumerRecord<String, String> record : records) {
          
          
                    buffer.add(record);
                    System.out.printf("topic = %s,partition = %d, offset = %d, key = %s, value = %s%n",
                            record.topic(),record.partition(),record.offset(), record.key(), record.value());
                }
    
                if (buffer.size() >= minBatchSize) {
          
          
                    System.out.println(buffer);
    
                    try {
          
          
                        /**
                         * 这里是业务逻辑,把数据保存到数据库中
                         *    如果失败,则回滚
                         */
    //                insertIntoDb(buffer);
    
                        //如果成功,则手动通知offset提交
                        consumer.commitSync();//消费过之后不会再重复消费
                    } catch (Exception e) {
          
          
                        System.out.println("失败,不提交");//失败不执行commitSync,后续重复发送会消费
                        throw new RuntimeException(e);
                    }
    
                    buffer.clear();
                }
            }
    
    
        }
    
        public static void main(String[] args) {
          
          
            consumerTest();
        }
    
    }
    
    

4.2.3 Each partition is processed separately

4.2.3.1 Explanation

  • The example above uses commitSyncto mark all received records as committed. In some cases, you may wish to have more control over committed records by specifying an offset explicitly. In this example weCommit offsets after processing records in each partition

4.2.3.2 Code

  • code show as below:
    Insert image description here

    package com.liu.susu.consumer;
    
    import org.apache.kafka.clients.consumer.ConsumerRecord;
    import org.apache.kafka.clients.consumer.ConsumerRecords;
    import org.apache.kafka.clients.consumer.KafkaConsumer;
    import org.apache.kafka.clients.consumer.OffsetAndMetadata;
    import org.apache.kafka.common.TopicPartition;
    
    import java.time.Duration;
    import java.util.*;
    
    /**
     * @Description 处理完每个分区中的记录后提交偏移量
     * @Author susu
     */
    public class ConsumerExample3 {
          
          
    
        public static void consumerTest(){
          
          
            Properties props = new Properties();
            props.setProperty("bootstrap.servers", "43.143.190.116:9092");
            props.setProperty("group.id", "test");
            props.setProperty("enable.auto.commit", "false");//false 手动提交
            props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
            props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
    
    //        consumer.subscribe(Arrays.asList("susu-topic", "susu-topic-2"));
            consumer.subscribe(Arrays.asList("susu-topic"));
    
            try {
          
          
                while(true) {
          
          
                    ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
                    // 每个partition单独处理
                    for (TopicPartition partition : records.partitions()) {
          
          
    
                        List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
                        for (ConsumerRecord<String, String> record : partitionRecords) {
          
          
    //                        System.out.println(record.offset() + ": " + record.value());
                            System.out.printf("topic = %s,partition = %d, offset = %d, key = %s, value = %s%n",
                                    record.topic(),record.partition(),record.offset(), record.key(), record.value());
                        }
    
                        long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();
                        // 循环一个partition,提交一次
                        OffsetAndMetadata offsetAndMetadata = new OffsetAndMetadata(lastOffset + 1);
                        consumer.commitSync(Collections.singletonMap(partition, offsetAndMetadata));
                    }
                }
            } finally {
          
          
                consumer.close();
            }
    
        }
    
        public static void main(String[] args) {
          
          
            consumerTest();
        }
    
    }
    
    
  • The effect is as follows:
    Insert image description here

4.2.3.3 Attention

  • Note: The committed offset should always be the offset of the next message the application will read. Therefore, when calling commitSync(offsets), you should add one to the offset of the last message processed.

4.2.4 Manually control which partition is consumed (manual partition allocation)

4.2.4.1 Description

  • In the previous example, we subscribed to the topics we were interested in and let Kafka dynamically allocate a fair share of partitions to these topics based on the active consumers in the group. However, in some cases you may need better control over the specific partitions allocated. For example:
    • If a process is maintaining some local state related to that partition (such as a key-value store on local disk), then it should only fetch records for the partition it maintains on disk.
    • If the process itself is highly available and will be restarted on failure (perhaps using a cluster management framework like YARN, Mesos or AWS facilities, or as part of a stream processing framework). In this case, Kafka does not need to detect the failure and reallocate the partitions because the consuming process will be restarted on another machine.
  • To use this mode, you don't need to subscribe to a topic with subscribe, just call assign(Collection), which contains the full list of partitions to use.

4.2.4.2 Code

  • as follows:
    Insert image description here

    package com.liu.susu.consumer;
    
    import org.apache.kafka.clients.consumer.ConsumerRecord;
    import org.apache.kafka.clients.consumer.ConsumerRecords;
    import org.apache.kafka.clients.consumer.KafkaConsumer;
    import org.apache.kafka.clients.consumer.OffsetAndMetadata;
    import org.apache.kafka.common.TopicPartition;
    
    import java.time.Duration;
    import java.util.Arrays;
    import java.util.Collections;
    import java.util.List;
    import java.util.Properties;
    
    /**
     * @Description 指定消费某个分区
     * @Author susu
     */
    public class ConsumerExample4 {
          
          
    
        public static void consumerTest(){
          
          
            Properties props = new Properties();
            props.setProperty("bootstrap.servers", "43.143.190.116:9092");
            props.setProperty("group.id", "test");
            props.setProperty("enable.auto.commit", "false");//false 手动提交
            props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
            props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
    
            String topicName = "susu-topic";
            TopicPartition partition0 = new TopicPartition(topicName, 0);
            TopicPartition partition1 = new TopicPartition(topicName, 1);
            TopicPartition partition2 = new TopicPartition(topicName, 2);
    
            consumer.assign(Arrays.asList(partition2)); //只有partition2消费
    //        consumer.assign(Arrays.asList(partition0, partition1));  //只有partition0, partition1消费
    
    
            try {
          
          
                while(true) {
          
          
                    ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
                    // 每个partition单独处理
                    for (TopicPartition partition : records.partitions()) {
          
          
    
                        List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
                        for (ConsumerRecord<String, String> record : partitionRecords) {
          
          
    //                        System.out.println(record.offset() + ": " + record.value());
                            System.out.printf("topic = %s,partition = %d, offset = %d, key = %s, value = %s%n",
                                    record.topic(),record.partition(),record.offset(), record.key(), record.value());
                        }
    
                        long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();
                        // 循环一个partition,提交一次
                        OffsetAndMetadata offsetAndMetadata = new OffsetAndMetadata(lastOffset + 1);
                        consumer.commitSync(Collections.singletonMap(partition, offsetAndMetadata));
                    }
                }
            } finally {
          
          
                consumer.close();
            }
    
        }
    
        public static void main(String[] args) {
          
          
            consumerTest();
        }
    
    }
    
    

4.2.4.3 Effects

  • as follows:
    Insert image description here

    Insert image description here

4.2.5 Consumer multi-thread processing

4.2.5.1 Consumer thread is not safe

  • Kafka consumers are not thread-safe. All network I/O occurs within the calling application thread. It is the user's responsibility to ensure that multi-threaded access is properly synchronized. Unsynchronized access will result in ConcurrentModificationException.
  • The only exception to this rule is wakeup(), which can safely interrupt active operations from an external thread. In this case, the thread blocking the operation will throw a WakeupException. This can be used to shut down the consumer from another thread.
    • Then in a separate thread, the consumer can be closed by setting the closed flag and waking up the consumer.
      closed.set(true);
      consumer.wakeup ();
      

4.2.5.2 Two ways to implement

4.2.5.2.1 One consumer per thread
  • A simple option is to provide each thread with its own consumer instance. Here are the pros and cons of this approach:
    • Pro: This is the easiest to implement
    • Advantages: It is usually the fastest because no inter-thread coordination is required
    • Advantages: It makes ordered processing on a per-partition basis very easy to implement (each thread only processes messages in the order they are received).
    • Disadvantage: More consumers means more TCP connections to the cluster (one per thread). Generally speaking, Kafka handles connections very efficiently, so this is usually a small cost.
    • Disadvantages: Multiple consumers mean more requests being sent to the server and slightly less data batching, which may cause I/O throughput to decrease.
    • Disadvantage: The total number of threads for all processes will be limited by the total number of partitions.
4.2.5.2.1 Separate consumption and processing
  • This approach lets one or more consumer threads do all data consumption and hands the ConsumerRecords instance to a blocking queue, which is used by the processor thread pool that actually handles record processing. This option also has pros and cons:
    • Advantages: This option allows scaling the number of consumers and processors independently. This allows a single consumer to serve multiple processor threads, avoiding any restrictions on partitioning.
    • Disadvantages: Guaranteeing order across processors requires special attention because threads will execute independently and due to luck of thread execution time, earlier blocks of data may actually be processed after later blocks of data. For processing without ordering requirements, this is not a problem.
    • Disadvantage: Manually committing a location becomes more difficult because it requires coordination of all threads to ensure processing of that partition is complete.
      There are many possible variations of this approach. For example, each processor thread can have its own queue, and consumer threads can hash into these queues using TopicPartitions to ensure ordered consumption and simplify submission.

4.2.5.3 Typical pattern (one consumer per thread)

  • code show as below:
    Insert image description here

    package com.liu.susu.consumer.thread;
    
    import org.apache.kafka.clients.consumer.ConsumerRecord;
    import org.apache.kafka.clients.consumer.ConsumerRecords;
    import org.apache.kafka.clients.consumer.KafkaConsumer;
    import org.apache.kafka.clients.consumer.OffsetAndMetadata;
    import org.apache.kafka.common.TopicPartition;
    import org.apache.kafka.common.errors.WakeupException;
    
    import java.time.Duration;
    import java.util.*;
    import java.util.concurrent.atomic.AtomicBoolean;
    
    /**
     * @Description
     * @Author susu
     */
    public class KafkaConsumerRunner implements Runnable {
          
          
        private final AtomicBoolean closed = new AtomicBoolean(false);
        private final KafkaConsumer consumer;
        public KafkaConsumerRunner(KafkaConsumer consumer) {
          
          
            this.consumer = consumer;
        }
    
        @Override
        public void run() {
          
          
            try {
          
          
                consumer.subscribe(Arrays.asList("susu-topic"));//订阅
                while (!closed.get()) {
          
          
                    ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
                    // Handle new records
                    for (TopicPartition partition : records.partitions()) {
          
          
    
                        List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
                        for (ConsumerRecord<String, String> record : partitionRecords) {
          
          
                            System.out.printf("Thread = %s,topic = %s,partition = %d, offset = %d, key = %s, value = %s%n",
                                    Thread.currentThread().getName(),
                                    record.topic(),record.partition(),record.offset(), record.key(), record.value());
                        }
    
                        long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();
                        // 循环一个partition,提交一次
                        OffsetAndMetadata offsetAndMetadata = new OffsetAndMetadata(lastOffset + 1);
                        consumer.commitSync(Collections.singletonMap(partition, offsetAndMetadata));
                    }
                }
            } catch (WakeupException e) {
          
          
                // Ignore exception if closing
                if (!closed.get()) throw e;
            } finally {
          
          
                consumer.close();
            }
        }
    
        // Shutdown hook which can be called from a separate thread
        public void shutdown() {
          
          
            closed.set(true);
            consumer.wakeup();
        }
    
    
        /**
         * 构建 consumer
         * @return consumer
         */
        public static KafkaConsumer<String, String> getKafkaConsumer(){
          
          
            Properties props = new Properties();
            props.setProperty("bootstrap.servers", "43.143.190.116:9092");
            props.setProperty("group.id", "test");
            props.setProperty("enable.auto.commit", "false");//false 手动提交
            props.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
            props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
    
            return consumer;
        }
    
        public static void main(String[] args) {
          
          
            KafkaConsumer<String, String> consumer = getKafkaConsumer();
    
            KafkaConsumerRunner runner = new KafkaConsumerRunner(consumer);
    
            Thread thread = new Thread(runner);
            thread.start();
    
    //        runner.shutdown();
        }
    
    }
    
    

4.2.5.4 Separate consumption and processing (thread pool processing)

4.2.6

4.3

Guess you like

Origin blog.csdn.net/suixinfeixiangfei/article/details/132223203