Flink dynamically transfers data to MySQL

I found many ways how to transfer streaming data from Flink to MySQL, but all ended in failure, and finally found a practical method! The code is written in JAVA and is divided into four parts: data type definition, streaming data generation, output setting, and main program running. The kernel idea is actually very simple, and you can operate it step by step:

1. Data type definition

Generate a POJO class. I set multiple types of attributes at will here, which can be adjusted freely.

package tsinghua.test;

import java.sql.Timestamp;


public class DataEvent {
    public String machineName;  // 机器名称
    public int machine_id;      // 机器编号


    public DataEvent() {
    }

    public DataEvent(String machineName, int machine_id) {
        this.machineName = machineName;
        this.machine_id = machine_id;

    }

    @Override
    public String toString() {
        return "DataEvent{machineName = " + machineName +
                ", machine_id = " + machine_id 
    }
    
}

2. Streaming data generation

To simulate a streaming data generation source, you can also use Kafka to connect to external data sources. The main operation demonstrated here is not this, just simulate one.

package tsinghua.test;

/**
 * 自定义的数据源
 */

import org.apache.flink.streaming.api.functions.source.SourceFunction;

import java.util.Calendar;
import java.util.Random;

public class DataSource implements SourceFunction<DataEvent> {
    // 声明一个布尔变量,作为控制数据生成的标识位
    private Boolean running = true;

    @Override
    public void run(SourceContext<DataEvent> ctx) throws Exception {
        int countNum = 0;
        String[] machineNames = {"贴片机", "组装机", "焊接机", "车床001", "车床002"};
        int[] machine_id = {1, 2, 3, 4, 5};


        while (running) {
            // collect用于发送数据 当前发送的数据类型是Event
            ctx.collect(new DataEvent(
                    machineNames[countNum % 5],
                    machine_id[countNum % 5],
            ));
            countNum++;
            // 隔3秒生成一个点击事件,方便观测
            Thread.sleep(3000);
        }
    }

    @Override
    public void cancel() {
        running = false;
    }

}

3. Sink output setting

Data output to MySQL is realized by inheriting the RichSinkFunction rich function class. Note that the corresponding library (here is test) needs to be defined in Mysql in advance , and the corresponding table (here is dataType01) must be created! The table structure also needs to be consistent with the data form.
package tsinghua.test.getDataTest;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import tsinghua.test.DataEvent;

import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
public class SinkPart extends RichSinkFunction<DataEvent>{


    Connection connection = null;
    PreparedStatement statement = null;

    @Override
    public void open(Configuration parameters) throws Exception {
        String url = "jdbc:mysql://localhost:3306/test?autoReconnect=true&useUnicode=true&characterEncoding=utf8&serverTimezone=GMT%2B8&useSSL=false";
        connection = DriverManager.getConnection(url, "root", "123456");
        statement = connection.prepareStatement("INSERT INTO datatype01 (machineId, machineName) VALUES (?, ?)");
    }

    @Override
    public void invoke(DataEvent event, Context context) throws Exception {
        //直接执行更新语句
        statement.setInt(1, event.machine_id);
        statement.setString(2, event.machine_name);
        statement.execute();
    }

    @Override
    public void close() throws Exception {
        statement.close();
        connection.close();
    }
}

4. The main program runs

The last step is as simple as running the code. Many of the imports here are actually useless. They were inserted during various trials and errors before, and can be deleted as needed. The water level can also not be set.

package tsinghua.test.getDataTest;

import org.apache.commons.cli.Option;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Row;
import tsinghua.test.DataEvent;
import tsinghua.test.DataSource;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
import java.time.Duration;

public class DataToMySQL {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.getConfig().setAutoWatermarkInterval(100);

        // 水位线一般在数据源处生成 调用.assignTimestampsAndWatermarks方法,方法需要传入一个 WatermarkStrategy 作为参数
        // WatermarkStrategy 中包含了一个“时间戳分配器” TimestampAssigner 和一个“水位线生成器” WatermarkGenerator。
        // 乱序流水位线生成策略 WatermarkStrategy.<T>forBoundedOutOfOrderness() 传入参数设置水位线推迟时间
        // .withTimestampAssigner()制定如何提取数据中的时间戳
        SingleOutputStreamOperator<DataEvent> stream = env.addSource(new DataSource())
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<DataEvent>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                                .withTimestampAssigner(new SerializableTimestampAssigner<DataEvent>() {
                                    @Override
                                    public long extractTimestamp(DataEvent element, long recordTimestamp) {
                                        return element.timestamp;
                                    }
                                })
                );


        stream.addSink(new SinkPart());

        stream.print();

        env.execute();


    }


}

Guess you like

Origin blog.csdn.net/m0_58285786/article/details/130684916