Flink batch (batch) write to mysql/oracle

1 Introduction

The blogger previously shared an article about flink's high-performance writing to relational databases. Although the effect of that article can achieve high-performance writing data, it sacrifices the robustness of the program, such as encountering uncontrollable factors: database Restart, connection failure, connection timeout, etc., so that the online program may have problems, and such problems may only log and print errors, and will not cause the program to hang, so if such problems occur, it is difficult be found.

Next, the bloggers share a wave of source code to implement streaming batch writing to relational databases.

The flow of the entire program is like this: kafka->flink->mysql

2. Driver class description: Flink consumes Kafka data source and uses window to realize sink every 10s.

package com.learn.flinkBatchMysql;

import com.learn.metricsCounter.MyMapper;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;


import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

public class Driver {
    public static void main(String[] args) throws Exception {
        //1、flink运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //2、kafka数据源
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","centos:9092");
        properties.setProperty("group.id", "aa");
        FlinkKafkaConsumer011<String> kafkaSource0 = new FlinkKafkaConsumer011<>("hhhh", new SimpleStringSchema(), properties);
        DataStreamSource<String> kafkaSource = env.addSource(kafkaSource0);

        //3、流式数据没10s做为一个批次,写入到mysql
        SingleOutputStreamOperator<List<String>> streamOperator = kafkaSource.timeWindowAll(Time.seconds(10)).apply(new AllWindowFunction<String, List<String>, TimeWindow>() {
            @Override
            public void apply(TimeWindow window, Iterable<String> values, Collector<List<String>> out) throws Exception {
                ArrayList<String> students = Lists.newArrayList(values);
                if (students.size() > 0) {
                    out.collect(students);
                }
            }
        });

        //4、每批的数据批量写入到mysql
        streamOperator.addSink(new SinkToMySQL());

        env.execute("metricsCounter");
    }
}

3. Description of sink class: Use batch to output data in a window to mysql.

package com.learn.flinkBatchMysql;
import org.apache.commons.dbcp2.BasicDataSource;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.List;


/**
 * 程序功能: 数据批量 sink 数据到 mysql
 * */
public class SinkToMySQL extends RichSinkFunction<List<String>> {
    private PreparedStatement ps;
    private BasicDataSource dataSource;
    private Connection connection;

    /**
     * open() 方法中建立连接,这样不用每次 invoke 的时候都要建立连接和释放连接
     */
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        dataSource = new BasicDataSource();
        connection = getConnection(dataSource);
        String sql = "insert into Student(id, name, password, age) values(?, ?, ?, ?);";
        ps = this.connection.prepareStatement(sql);
    }

    @Override
    public void close() throws Exception {
        if (connection != null) {
            connection.close();
        }
        if (ps != null) {
            ps.close();
        }
    }

    /**
     * 每批数据的插入都要调用一次 invoke() 方法
     */
    @Override
    public void invoke(List<String> value, Context context) throws Exception {
        //遍历数据集合
        for (String student : value) {
            ps.setInt(1, 1);
            ps.setString(2, student);
            ps.setString(3, "123456");
            ps.setInt(4, 18);
            ps.addBatch();
        }
        int[] count = ps.executeBatch();  //批量后执行
        System.out.println("成功了插入了" + count.length + "行数据");
    }


    private static Connection getConnection(BasicDataSource dataSource) {
        dataSource.setDriverClassName("com.mysql.jdbc.Driver");
        //注意,替换成自己本地的 mysql 数据库地址和用户名、密码
        dataSource.setUrl("jdbc:mysql://localhost:3306/novel");
        dataSource.setUsername("root");
        dataSource.setPassword("root");
        //设置连接池的一些参数
        dataSource.setInitialSize(10);
        dataSource.setMaxTotal(50);
        dataSource.setMinIdle(2);

        Connection con = null;
        try {
            con = dataSource.getConnection();
            System.out.println("创建连接池:" + con);
        } catch (Exception e) {
            System.out.println("-----------mysql get connection has exception , msg = " + e.getMessage());
        }
        return con;
    }
}

4. Sink class description: Use batch to output data in a window to oracle.

package com.learn.flinkBatchMysql;
import org.apache.commons.dbcp2.BasicDataSource;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.List;


/**
 * 程序功能: 数据批量 sink 数据到 mysql
 * */
public class SinkToMySQL extends RichSinkFunction<List<String>> {
    private PreparedStatement ps;
    private BasicDataSource dataSource;
    private Connection connection;

    /**
     * open() 方法中建立连接,这样不用每次 invoke 的时候都要建立连接和释放连接
     */
    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        dataSource = new BasicDataSource();
        connection = getConnection(dataSource);
        String sql = "insert into STUDENT(ID,NAME,PASSWORD,AAAA) values(?, ?, ?, ?)";
        ps = this.connection.prepareStatement(sql);
    }

    @Override
    public void close() throws Exception {
        if (connection != null) {
            connection.close();
        }
        if (ps != null) {
            ps.close();
        }
    }

    /**
     * 每批数据的插入都要调用一次 invoke() 方法
     */
    @Override
    public void invoke(List<String> value, Context context) throws Exception {
        //遍历数据集合
        for (String student : value) {
            ps.setInt(1, 1);
            ps.setString(2, student);
            ps.setString(3, "123456");
            ps.setInt(4, 18);
            ps.addBatch();
        }
        int[] count = ps.executeBatch();  //批量后执行
        System.out.println("成功了插入了" + count.length + "行数据");
    }


    private static Connection getConnection(BasicDataSource dataSource) {
        //设置连接池的一些参数
        dataSource.setInitialSize(10);
        dataSource.setMaxTotal(50);
        dataSource.setMinIdle(2);

        Connection con = null;
        try {
            Class.forName("oracle.jdbc.driver.OracleDriver");
            con = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:xe", "hr", "hr");
            System.out.println("创建连接池:" + con);
        } catch (Exception e) {
            System.out.println("-----------mysql get connection has exception , msg = " + e.getMessage());
        }
        return con;
    }
}

Oracle recommends using uppercase for table names and field names to avoid errors: such as non-existent or invalid columns in the table or view.

5. Summary:

Many examples on the Internet are in the form of simple demos. They create a database connection and insert into MySQL for a single piece of data. If the amount of data to be written is large, it will put a lot of pressure on writing MySQL. If you want to improve performance, you must batch Write. Take our current article as an example. If the amount of data is large, it can aggregate up to 10,000 pieces of data per minute, so the performance of batch writing will improve compared to writing one by one. I don't know how much.

Guess you like

Origin blog.csdn.net/qq_44962429/article/details/108057174