Using Kafka to implement producer and consumer instances based on Java API

Note: To do this experiment, you must first start the producer client code (KafkaProducerTest), and then start the consumer client (KafkaConsumerTest) to be successful.
1. Experimental conditions: Install the idea software (do not use the latest version, the new version is unstable), and be proficient in scala and java languages
2. Before the experiment, you need to create a Maven project named spark_chapter06 in the idea, in the pom.xml file Add Kafka dependencies in. It should be noted that the Kafka dependencies are consistent with the installed Kafak version. Otherwise, it will easily lead to inability to run or run problems, which will be very troublesome to solve.
3. Dependencies in the pom.xml file I use: (For reference only, you also need to add dependencies according to the relevant plug-in version in the idea on your computer) Dependency
code:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>hw</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
        <scala.version>2.12.15</scala.version>
        <hadoop.version>2.7.4</hadoop.version>
        <spark.version>3.1.2</spark.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>1.1.0</version>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${
    
    scala.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.12</artifactId>
            <version>3.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.4</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.12</artifactId>
            <version>3.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>2.8.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.12</artifactId>
            <version>3.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-auth</artifactId>
            <version>2.7.4</version>
        </dependency>
    </dependencies>
    <build>
        <sourceDirectory>src/main/scala</sourceDirectory>
        <testOutputDirectory>src/test/scala</testOutputDirectory>
    </build>
</project>

After the addition is completed, click Maven on the right side of the idea, and the relevant java package will be automatically downloaded.

4. Create a Java directory and create a KafkaProducerTest file in the Java directory to implement production information data and send the data to the Kafka cluster.

1. Producer client code

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;

import java.util.Properties;

public class KafkaProducerTest {
    
    
     public static void main(String[] args) {
    
    
         String topicName="testtopic";
         //创建topic
         Properties props=new Properties();
         //指定IP地址和端口号
         props.put("bootstrap.servers",
                 "localhost:9092");
         //指定等待节点的应答
         props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
         //指定键的序列化器
         props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);
         //指定值的序列化器
         props.put("acks","all");
         //The "all" setting we have specified will result in blocking on the full commit of the record, the slowest but most durable setting.
         //“所有”设置将导致记录的完整提交阻塞，最慢的，但最持久的设置。
         //指定发信息的最大尝试次数
         props.put(ProducerConfig.ACKS_CONFIG,"-1"); //应答机制设置为-1，表示全部应答
         props.put("retries",0);
         //指定一批信息处理大小
         props.put("batch.size",16384);
         //默认立即发送，这里这是延时毫秒数
         props.put("linger.ms",1);
         //生产者缓冲大小，当缓冲区耗尽后，额外的发送调用将被阻塞。时间超过max.block.ms将抛出TimeoutException
         props.put("buffer.memory",33554432);
         //设置key的序列化
         //The key.serializer and value.serializer instruct how to turn the key and value objects the
         props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
         props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,StringSerializer.class.getName());
         props.put("key.serializer",
                 "org.apache.kafka.common.serialization.StringSerializer");
         //设置value序列化
         props.put("value.serializer",
                 "org.apache.kafka.common.serialization.StringSerializer");
         //生产数据
         KafkaProducer<String,String> producer=
                 new KafkaProducer<String, String>(props);
         //创建kafka生产类
         for (int i=0;i<100;i++){
    
    
             producer.send(new ProducerRecord<String,String>
                     ("testtopic",Integer.toString(i),"hello world-"+i));
             //利用for 循环 生存100次
             //生产者的主要方法
             producer.send(new ProducerRecord<String, String>("testtopic", "测试Kafka"));
         }
         try {
    
    
             Thread.sleep(100);
         } catch (InterruptedException e) {
    
    
             e.printStackTrace();
         }
         System.out.println("game over");
         //关闭生产者
         //System.out.println(" Message sent successfully");
            producer.close();
    }
}

2. Client screenshot (must have production data)
Insert image description here

2. Message client

1. Create a KafkaConsumerTest object through the Kafka API to consume information data in the Kafka cluster. Create the KafkaConsumerTest.java file under the project.
1. Client program (the code must have comments)

import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.lang.reflect.Array;
import java.util.Arrays;
import java.util.Collections;
import java.util.Properties;
import java.util.concurrent.Callable;

public class KafkaConsumerTest {
    
    

    private static Object val;

    public static void main(String[] args) {
    
    
        //kafka consumer configuration settings
        String topicName = "testtopic";
        //配置文件
        Properties props = new Properties();
        //指定kafka
        props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
        //使用earliest，最早的，表示消费过的数据还会消费
        //第一个消费者组 名字为 CHE YONG LIN 1
        props.put(ConsumerConfig.GROUP_ID_CONFIG,"CHE YONG LIN 1");
        //组名任意

        props.put("bootstrap.servers",
                "localhost:9092");
        //指定kafka IP地址和kafka端口
        props.put("group.id", "testtopic");
        //自动提交偏移量
        props.put("enable.auto.commit", "true");
        //自动提交时间，每秒一次
        props.put("auto.commit.interval.ms", "1000");
        props.put("key.deserializer",
                "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer",
                "org.apache.kafka.common.serialization.StringDeserializer");
        KafkaConsumer<String, String> kafkaConsumer =
                new KafkaConsumer<String, String>(props);
        //订阅信息
        kafkaConsumer.subscribe(Arrays.asList("testtopic"));
        //获取信息
        while (true) {
    
    
            //每隔100ms 提交一次
            ConsumerRecords<String, String> records =
                    kafkaConsumer.poll(100);
            for (ConsumerRecord<String, String> record : records) {
    
    
                System.out.printf("topic=%s,offset=%d,key=%s,value=%s%n", record.topic(),
                        record.offset(), record.key(), record.value());
            }
            props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
            //false表示自动提交关闭


            //第二种消费者组，如果需要测试请把第一组消费者组注释掉


            //第一个消费者组 名字为 CHE YONG LIN 2
           /* props.put(ConsumerConfig.GROUP_ID_CONFIG,"CHE YONG LIN 2");
            props.put("key.deserializer",
                    "org.apache.kafka.common.serialization.StringDeserializer");
            props.put("value.deserializer",
                    "org.apache.kafka.common.serialization.StringDeserializer");
            //订阅信息
            kafkaConsumer.subscribe(Arrays.asList("testtopic"));
            KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
            consumer.subscribe(Collections.singleton("testtopic"));
            for (int i = 0; i < 4; i++) {
            while(true){
                ConsumerRecords<String, String> poll = consumer.poll(100);
                for (ConsumerRecord<String, String> record : poll) {
                    System.out.println(Thread.currentThread().getName()+"\t"+record.offset() + "\t" + record.key() + "\t" + record.value());
                }
            }}*/
            }

        }
    }

2. Client screenshot (must have consumption data)

Insert image description here