Flink动态传输数据到MySQL

找了很多方法如何从Flink把流式数据传入MySQL,都以失败告终,最后找到了一个实际可行的方法!代码用JAVA编写,分为四个部分,数据类型定义,流式数据生成,输出设置,主程序运行,内核思路其实很简单,一步步操作即可:

一、数据类型定义

生成一个POJO类,我这里随意设置了多个类型的属性,可以自由调整。

package tsinghua.test;

import java.sql.Timestamp;


public class DataEvent {
    public String machineName;  // 机器名称
    public int machine_id;      // 机器编号


    public DataEvent() {
    }

    public DataEvent(String machineName, int machine_id) {
        this.machineName = machineName;
        this.machine_id = machine_id;

    }

    @Override
    public String toString() {
        return "DataEvent{machineName = " + machineName +
                ", machine_id = " + machine_id 
    }
    
}

二、流式数据生成

模拟一个流式数据生成源,也可以使用Kafka等连接外部数据源,这里主要演示的操作不是这个,就模拟产生一个。

package tsinghua.test;

/**
 * 自定义的数据源
 */

import org.apache.flink.streaming.api.functions.source.SourceFunction;

import java.util.Calendar;
import java.util.Random;

public class DataSource implements SourceFunction<DataEvent> {
    // 声明一个布尔变量,作为控制数据生成的标识位
    private Boolean running = true;

    @Override
    public void run(SourceContext<DataEvent> ctx) throws Exception {
        int countNum = 0;
        String[] machineNames = {"贴片机", "组装机", "焊接机", "车床001", "车床002"};
        int[] machine_id = {1, 2, 3, 4, 5};


        while (running) {
            // collect用于发送数据 当前发送的数据类型是Event
            ctx.collect(new DataEvent(
                    machineNames[countNum % 5],
                    machine_id[countNum % 5],
            ));
            countNum++;
            // 隔3秒生成一个点击事件,方便观测
            Thread.sleep(3000);
        }
    }

    @Override
    public void cancel() {
        running = false;
    }

}

三、sink输出设置

通过继承RichSinkFunction富函数类,实现到MySQL的数据输出。注意需要预先在Mysql中定义好对应的库(此处为test),创建好对应的表(此处为dataType01)!表结构也需要与数据形式保持一致。
package tsinghua.test.getDataTest;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import tsinghua.test.DataEvent;

import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
public class SinkPart extends RichSinkFunction<DataEvent>{


    Connection connection = null;
    PreparedStatement statement = null;

    @Override
    public void open(Configuration parameters) throws Exception {
        String url = "jdbc:mysql://localhost:3306/test?autoReconnect=true&useUnicode=true&characterEncoding=utf8&serverTimezone=GMT%2B8&useSSL=false";
        connection = DriverManager.getConnection(url, "root", "123456");
        statement = connection.prepareStatement("INSERT INTO datatype01 (machineId, machineName) VALUES (?, ?)");
    }

    @Override
    public void invoke(DataEvent event, Context context) throws Exception {
        //直接执行更新语句
        statement.setInt(1, event.machine_id);
        statement.setString(2, event.machine_name);
        statement.execute();
    }

    @Override
    public void close() throws Exception {
        statement.close();
        connection.close();
    }
}

四、主程序运行

最后一步很简单,运行代码即可。这里面很多Import其实没用,是之前各种试错的时候插进去的,可按需要删除。水位线也可以不设置。

package tsinghua.test.getDataTest;

import org.apache.commons.cli.Option;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.connector.jdbc.JdbcConnectionOptions;
import org.apache.flink.connector.jdbc.JdbcSink;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.types.Row;
import tsinghua.test.DataEvent;
import tsinghua.test.DataSource;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
import java.time.Duration;

public class DataToMySQL {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.getConfig().setAutoWatermarkInterval(100);

        // 水位线一般在数据源处生成 调用.assignTimestampsAndWatermarks方法,方法需要传入一个 WatermarkStrategy 作为参数
        // WatermarkStrategy 中包含了一个“时间戳分配器” TimestampAssigner 和一个“水位线生成器” WatermarkGenerator。
        // 乱序流水位线生成策略 WatermarkStrategy.<T>forBoundedOutOfOrderness() 传入参数设置水位线推迟时间
        // .withTimestampAssigner()制定如何提取数据中的时间戳
        SingleOutputStreamOperator<DataEvent> stream = env.addSource(new DataSource())
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy.<DataEvent>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                                .withTimestampAssigner(new SerializableTimestampAssigner<DataEvent>() {
                                    @Override
                                    public long extractTimestamp(DataEvent element, long recordTimestamp) {
                                        return element.timestamp;
                                    }
                                })
                );


        stream.addSink(new SinkPart());

        stream.print();

        env.execute();


    }


}

猜你喜欢

转载自blog.csdn.net/m0_58285786/article/details/130684916