Kafka+Streatured Streaming+Mysql项目实例

一.实验内容

实验环境

Hadoop2.7.5

Spark2.4.0

Zookeeper -3.4.8

Kafka_2.11-2.1.0

mysql

实验内容

模拟产生数据写入Kafka,Structured Streaming读取Kafka中数据，通过Structured Streaming计算将数据写入Mysql,其中写入Mysql的过程中需要借助JdbcSink.

二.实验步骤

2.1 使用IDEA创建Maven项目

2,2生成模拟数据写入到kafka StockDataProducer.java

package com.processor.driver;
import java.util.Properties;
import java.util.Random;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

public class StockDataProducer {
	private KafkaProducer<String, String> kafkaProducer;
	public static void main(String[] args) {
		StockDataProducer driver = new StockDataProducer();
		driver.run();
	}
	public void run() {
		Properties props = new Properties();

		props.put("bootstrap.servers", "192.168.100.10:9092");
		props.put("acks", "all");
		props.put("retries", 0);
		props.put("batch.size", 16384);
		props.put("linger.ms", 1);
		props.put("buffer.memory", 33554432);
		props.put("key.serializer",
				"org.apache.kafka.common.serialization.StringSerializer");
		props.put("value.serializer",
				"org.apache.kafka.common.serialization.StringSerializer");
		this.kafkaProducer = new KafkaProducer<String, String>(props);
		int i = 0;
		Random random = new Random();
		while (true) {
			for (i = 0; i <= 10; i++) {
				kafkaProducer.send(new ProducerRecord<String, String>(
						"market-data-input", "" + i,
						"1" + i + ",2019-01-14 15:35:45,ANZ"
								+ random.nextInt(10) + ",buyer,seller,"
								+ 100.0 * random.nextInt(100) + ","
								+ 10 * random.nextInt(100)));
			}
			try {
				Thread.sleep(1000);
			} catch (InterruptedException e) {
				e.printStackTrace();
				kafkaProducer.close();
			}
			System.out.println("write success");
		}
	}
}

2.3使用Structured Streaming对kafka中的数据进行处理 StockDataProcessingDriver.java

在代码中通过读配置文件获取连接的kafka等信息实现初始化，然后编写run方法，对得到的结果集进行处理，在对计算的信息写入mysql时，需要使用JdbcSink对数据库进行更新形式的操作

package com.processor.driver;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.streaming.StreamingQuery;
import org.apache.spark.sql.streaming.StreamingQueryException;
import com.processor.function.KafkaOutputMapper;
import com.processor.function.StockMapper;
import com.processor.model.KafkaOutputBO;
import com.processor.model.StockBO;
import com.processor.sink.JdbcSink;
import com.processor.util.Util;
public class StockDataProcessingDriver {
	private SparkSession sparkSession;
	private String outputMode;
	private Map<String, String> config = new HashMap<String, String>();
	private Properties prop;
	private String propFileName;
	private String shutdownHook;
	public StockDataProcessingDriver() {
		this.propFileName = "config.properties";
		init();
	}
	// constructor for integration tests
	public StockDataProcessingDriver(String propertyFile, String shutdownHook) {
		this.propFileName = propertyFile;
		this.shutdownHook = shutdownHook;
		init();
	}
	public void init() {
		this.prop = Util.loadProperties(propFileName, this.getClass());
		this.sparkSession = SparkSession.builder()
			.config("spark.master", this.prop.getProperty("sparkMaster"))
			.appName("market-data-app").getOrCreate();
		this.outputMode = this.prop.getProperty("outputMode");
		config.put("kafka.bootstrap.servers", this.prop.getProperty("kafka.bootstrap.servers.sink"));
		config.put("subscribe", this.prop.getProperty("inputTopicName"));
		config.put("maxOffsetsPerTrigger", this.prop.getProperty("maxOffsetsPerTrigger"));
		config.put("startingOffsets", this.prop.getProperty("startingOffsets"));
	}
	public static void main(String[] args) {
		StockDataProcessingDriver driver = new StockDataProcessingDriver();
		driver.run();
	}
	public void run() {
		Dataset<Row> df = this.sparkSession.readStream().format("kafka").options(this.config).load();
		Dataset<Row> df_value = df.selectExpr("CAST(value AS STRING)");
		Dataset<StockBO> dsStocks = df_value.map(new StockMapper(), Encoders.bean(StockBO.class));
		// raw data storage
		StreamingQuery rawStorage = dsStocks.writeStream().format("parquet")
				.option("startingOffsets", "earliest")
				.option("checkpointLocation", this.prop.getProperty("checkpointRaw"))
				.option("path", this.prop.getProperty("outputPath"))
				.partitionBy("stockCode").start();
		Dataset<Row> aggResult = dsStocks
				.groupBy(dsStocks.col("stockCode").alias("Stock"),
						functions.window(dsStocks.col("timestamp"), "1 hour").alias("Hour"))
				.agg(functions.max(dsStocks.col("price").alias("Max")),
						functions.min(dsStocks.col("price").alias("Min")),
						functions.sum(dsStocks.col("volume")).alias("Volume"));
		StreamingQuery queryAgg = null;
		// kafka change log in append mode
		if (outputMode.equalsIgnoreCase("kafka")) {
			Dataset<KafkaOutputBO> dsKafka = aggResult.map(
					new KafkaOutputMapper(),
					Encoders.bean(KafkaOutputBO.class));
			queryAgg = dsKafka
					.selectExpr("CAST(key AS STRING) AS key", "CAST(value AS STRING) as value")
					.writeStream().format("kafka").outputMode("complete")
					.option("kafka.bootstrap.servers", this.prop.getProperty("kafka.bootstrap.servers.sink"))
					.option("topic", this.prop.getProperty("outputTopicName"))
					.option("checkpointLocation", this.prop.getProperty("checkPointSink"))
					.start();
		}
		// writing to mysql with foreach sink in update mode
		if (outputMode.equalsIgnoreCase("jdbc")) {
			JdbcSink sink = new JdbcSink();
			queryAgg = aggResult.writeStream().outputMode("update").foreach(sink).start();
		}
		try {
			if (this.shutdownHook != null) {
				rawStorage.processAllAvailable();
				queryAgg.processAllAvailable();
				queryAgg.stop();
				rawStorage.stop();
			} else {
				rawStorage.awaitTermination();
				queryAgg.awaitTermination();
			}
		} catch (StreamingQueryException e) {
			shutdown();
			e.printStackTrace();
		}
	}
	public void shutdown() {
		sparkSession.close();
	}

}

2.4使用JdbcSink连接数据库，并对数据库进行插入数据的操作 JdbcSink.java

package com.processor.sink;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.Properties;
import org.apache.spark.sql.ForeachWriter;
import org.apache.spark.sql.Row;
import com.processor.util.Util;
public class JdbcSink extends ForeachWriter<Row> {
	private static final long serialVersionUID = -873764205562254833L;
	public static final String DRIVER_CLASS = "com.mysql.jdbc.Driver";
	public static String url;
	public static String user;
	public static String password;
	private PreparedStatement statement;
	private Connection connection;
	private Properties prop;

	public JdbcSink() {
		loadProperties();
	}
	public void loadProperties() {
		this.prop = new Properties();
		String propFileName = "config.properties";
		InputStream inputStream = this.getClass().getClassLoader()
				.getResourceAsStream(propFileName);
		if (inputStream != null) {
			try {
				prop.load(inputStream);
				user = prop.getProperty("user");
				url = prop.getProperty("url");
				password = prop.getProperty("password");
			} catch (IOException e) {
				e.printStackTrace();
			}
		} else {
			System.out.println("property file '" + propFileName
					+ "' not found in the classpath");
		}
	}
	public boolean open(long arg0, long arg1) {
		try {
			Class.forName(DRIVER_CLASS);
			connection = DriverManager.getConnection("jdbc:mysql://192.168.100.10:3306/test", "root", "******");
			statement = connection.prepareStatement(
					"replace into " + prop.getProperty("tableName")
							+ "(Hour,Stock,Min,Max,Volume) values(?,?,?,?,?)");
		} catch (Exception e) {
			e.printStackTrace();
			return false;
		}
		return true;
	}
	public void process(Row row) {
		String[] arr = Util.getRowArray(row);
		try {
			statement.setString(1, Util.getHourFromTimeStamp(arr[1]));
			statement.setString(2, arr[0]);
			statement.setString(3, arr[4]);
			statement.setString(4, arr[3]);
			statement.setString(5, arr[5]);
			statement.executeUpdate();
		} catch (SQLException e) {
			e.printStackTrace();
		}
	}
	@Override
	public void close(Throwable arg0) {
		try {
			statement.close();
			connection.close();
		} catch (SQLException e) {
			e.printStackTrace();
		}
	}
}

zxucheng

发布了36 篇原创文章 · 获赞 19 · 访问量 3万+

私信关注

Kafka+Streatured Streaming+Mysql项目实例

猜你喜欢