Java实现一个简单的Kafka消息测试程序

记录一下最近做的一个小程序,模拟很多辆车不定时上报里程等状态数据到Kafka,从而对后端的批处理应用进行性能测试。

车辆的上报消息是JSON格式,假设包含如下字段:

{
	"telemetry": {
		"engineStatus": 1,
		"odometer": 120
		"doorStatus": 1
	},
	"timestamp": 1683797176608,
	"deviceId": "abc",
	"messageType": 1,
	"version": "1.0"
}

首先是生成一批车辆的deviceId,这些数据是存储在PG数据库中,因此我用PSQL连接到数据之后,用如下命令把vehicle表的10000辆车的数据导出到本地的一个csv文件中:

\copy (select * from vehicle limit 10000) to '/tmp/vehicle.csv' with csv; 

新建一个Java项目,把刚才导出的CSV文件放置在src/main/resources目录中。

编写代码,读取这个CSV文件中的deviceId这一列的数据,并初始化,为每个device设置一个初始里程,这里采用了opencsv这个库来读取CSV。然后是模拟这一批设备,在过去的一个小时里面,每间隔10秒左右发一条数据,上报其里程数据的变化。

另外,为了优化Kafka的消息发送的吞吐量,我们可以调整Producer的压缩格式,buffer_size, batch_size, linger_ms这几个参数。可以调整参数来看看不同的性能对比,再从中找到最合适的一个参数。优化的效果还是很大的。

代码逻辑很简单,如下:

package com.example;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Properties;
import java.util.HashMap;
import java.util.Random;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;

import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import java.io.InputStreamReader;

/**
 * Simulate the telemetry messages!
 *
 */
public class App 
{
    private static final Logger LOG = LoggerFactory.getLogger(App.class);
    private static String vehicle_file = "vehicle.csv";
    private static String telemetry_msg = "{\"telemetry\":{\"engineStatus\":1,\"odometer\":%d\"doorStatus\":1},\"timestamp\":%d,\"deviceId\":\"%s\",\"messageType\":1,\"version\":\"1.0\"}";
    private static int interval = 10;  //interval to send message per vehicle, default to 10s
    private static final int total_loops = 3600/interval;
    public static void main( String[] args )
    {   
        HashMap<String,Integer> device_odometer_map =new HashMap<>();
        // Get the vehicle device id
        try {
            CSVReader reader = new CSVReaderBuilder(
                new InputStreamReader(
                    App.class.getClassLoader()
                        .getResourceAsStream(vehicle_file))).build();
            String [] nextLine;
            
            while ((nextLine = reader.readNext()) != null) {
                device_odometer_map.put(nextLine[8], (int) new Random().nextFloat()*1000);
            }
        } catch (Exception e) {
            System.out.println(e.toString());
        }
        ArrayList<String> device_id = new ArrayList<String>(device_odometer_map.keySet());
        int total_device_id = device_id.size();

        long end_timestamp = System.currentTimeMillis();
        long start_timestamp = end_timestamp - (3600*1000);

        String bootstrapServers = "localhost:9092";
        // create Producer properties
        Properties properties = new Properties();
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        properties.put(ProducerConfig.COMPRESSION_TYPE_CONFIG, "lz4");
        //properties.put(ProducerConfig.BUFFER_MEMORY_CONFIG, 67108864);
        properties.put(ProducerConfig.BATCH_SIZE_CONFIG, 262144);
        properties.put(ProducerConfig.LINGER_MS_CONFIG, 10);

        // create the producer
        KafkaProducer<String, String> producer = new KafkaProducer<>(properties);

        int msg_count = 0;
        int total_msg = total_device_id*total_loops;
        for (int count=0;count<total_loops;count++) { 
            for (int i=0;i<total_device_id;i++) {
                long timestamp = start_timestamp+new Random().nextInt(1000);
                int odometer = device_odometer_map.get(device_id.get(i)) + new Random().nextInt(100*interval);
                device_odometer_map.put(device_id.get(i), odometer);
                String msg = String.format(telemetry_msg, odometer, timestamp, device_id.get(i));
                // create a producer record
                ProducerRecord<String, String> producerRecord =
                    new ProducerRecord<>("test.telemetry", msg);
                // send data - asynchronous
                producer.send(producerRecord);
                msg_count ++;
            }
            System.out.print(String.format("Sending telemetry messages to topic test.telemetry: %d/%d \r", msg_count, total_msg));
            start_timestamp += interval*1000;
        }
        // flush and close producer
        producer.flush();
        producer.close();
    }
}

POM文件如下:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.example</groupId>
  <artifactId>telemetry-simulate</artifactId>
  <packaging>jar</packaging>
  <properties>
    <maven-compiler-plugin.version>3.7.0</maven-compiler-plugin.version>
    <maven-jar-plugin.version>3.3.0</maven-jar-plugin.version>
    <maven-shade-plugin.version>3.2.4</maven-shade-plugin.version>
    <java.version>1.8</java.version>
  </properties>
  <build>
    <plugins>
      <plugin>
        <!-- Build an executable JAR -->
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-jar-plugin</artifactId>
        <version>${maven-jar-plugin.version}</version>
        <configuration>
          <archive>
            <manifest>
              <addClasspath>true</addClasspath>
              <classpathPrefix>lib/</classpathPrefix>
              <mainClass>com.example.App</mainClass>
            </manifest>
          </archive>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-compiler-plugin</artifactId>
        <version>${maven-compiler-plugin.version}</version>
        <configuration>
          <source>${java.version}</source>
          <target>${java.version}</target>
        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-shade-plugin</artifactId>
        <version>${maven-shade-plugin.version}</version>
        <executions>
          <execution>
            <phase>package</phase>
            <goals>
              <goal>shade</goal>
            </goals>
          </execution>
        </executions>
      </plugin>
    </plugins>
  </build>
  <version>1.0-SNAPSHOT</version>
  <name>telemetry-simulate</name>
  <url>http://maven.apache.org</url>
  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-api</artifactId>
      <version>1.7.25</version>
    </dependency>
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-log4j12</artifactId>
      <version>1.7.25</version>
    </dependency>
    <dependency>
      <groupId>log4j</groupId>
      <artifactId>log4j</artifactId>
      <version>1.2.17</version>
    </dependency>
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka-clients</artifactId>
      <version>2.3.0</version>
    </dependency>
    <dependency>
      <groupId>com.opencsv</groupId>
      <artifactId>opencsv</artifactId>
      <version>5.4</version>
    </dependency>
  </dependencies>
</project>

最后运行mvn clean package, java -jar运行即可。

猜你喜欢

转载自blog.csdn.net/gzroy/article/details/130657673