flink实时数仓(九):增量同步mysql中数据

数据库中配置流表

CREATE TABLE `dbus_flow` (
  `flowId` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增ID',
  `mode` int(11) NOT NULL COMMENT '存储类型(#PHOENIX  #NATIVE   #STRING,默认STRING)',
  `databaseName` varchar(50) NOT NULL COMMENT 'database',
  `tableName` varchar(50) NOT NULL COMMENT 'table',
  `hbaseTable` varchar(50) NOT NULL COMMENT 'hbaseTable',
  `family` varchar(50) NOT NULL COMMENT 'family',
  `uppercaseQualifier` tinyint(1) NOT NULL COMMENT '字段名转大写, 默认为true',
  `commitBatch` int(11) NOT NULL COMMENT '字段名转大写, 默认为true',
  `rowKey` varchar(100) NOT NULL COMMENT '组成rowkey的字段名,必须用逗号分隔',
  `status` int(11) NOT NULL COMMENT '状态:1-初始,2:就绪,3:运行',
  PRIMARY KEY (`flowId`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;


样例数据

INSERT INTO `dbus_flow` VALUES ('1', '0', 'test', 'zyd_orders', 'learing_flink:zyd_orders', '0', '1', '10', 'orderId', '2');

jdbc工具类

package dbus.utils;

import dbus.config.GlobalConfig;

import java.sql.*;

/**
 * jdbc通用的方法
 *
 */
public class JdbcUtil {
    //url
    private static String url = GlobalConfig.DB_URL;
    //user
    private static String user = GlobalConfig.USER_MAME;
    //password
    private static String password = GlobalConfig.PASSWORD;
    //驱动程序类
    private static String driverClass = GlobalConfig.DRIVER_CLASS;
    /**
     * 只注册一次,静态代码块
     */
    static{

        try {
            Class.forName(driverClass);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }



    /**
     * 获取连接方法
     */
    public static Connection getConnection(){
        try {
            Connection conn = DriverManager.getConnection(url, user, password);
            return conn;
        } catch (SQLException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    /**
     * 释放资源的方法
     */
    public static void close(Statement stmt,Connection conn){
        if(stmt!=null){
            try {
                stmt.close();
            } catch (SQLException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
        }

        if(conn!=null){
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
        }
    }

    /**
     * 释放资源的方法
     */
    public static void close(ResultSet rs,Statement stmt,Connection conn){
        if(rs!=null){
            try {
                rs.close();
            } catch (SQLException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
        }
        if(stmt!=null){
            try {
                stmt.close();
            } catch (SQLException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
        }

        if(conn!=null){
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
        }
    }

    public static void main(String[] args) {
        System.out.println(JdbcUtil.getConnection());
    }

}


枚举类

CodeEnum

package enums;

public interface CodeEnum {
    /**
     * 获取枚举的code值
     *
     * @return
     */
    Integer getCode();
}

FlowStatusEnum

package enums;

import lombok.Getter;

@Getter
public enum FlowStatusEnum implements CodeEnum {
    /**
     * 初始状态(新添加)
     */
    FLOWSTATUS_INIT(0, "初始状态"),
    /**
     * 就绪状态,初始采集后,可以将状态改为就绪状态
     */
    FLOWSTATUS_READY(1, "就绪状态"),
    /**
     * 运行状态(增量采集正在运行)
     */
    FLOWSTATUS_RUNNING(2, "运行状态");

    private Integer code;

    private String message;

    FlowStatusEnum(Integer code, String message) {
        this.code = code;
        this.message = message;
    }

}

HBaseStorageModeEnum

package enums;

import lombok.Getter;

@Getter
public enum HBaseStorageModeEnum implements CodeEnum{
    /**
     * STRING
     */
    STRING(0, "STRING"),
    /**
     * NATIVE
     */
    NATIVE(1, "NATIVE"),
    /**
     * PHOENIX
     */
    PHOENIX(2, "PHOENIX");

    private Integer code;

    private String message;

    HBaseStorageModeEnum(Integer code, String message) {
        this.code = code;
        this.message = message;
    }
}

配置类bean对象

Flow

**package dbus.model;

import enums.FlowStatusEnum;
import enums.HBaseStorageModeEnum;
import lombok.Data;
import lombok.ToString;

@Data
@ToString
public class Flow  implements Serializable{

    private Integer flowId;
    /**
     * HBase中的存储类型, 默认统一存为String,
     */
    private int mode= HBaseStorageModeEnum.STRING.getCode();
    /**
     * 数据库名/schema名
     */
    private String databaseName;
    /**
     * mysql表名
     */
    private String tableName;
    /**
     * hbase表名
     */
    private String hbaseTable;
    /**
     * 默认统一Column Family名称
     */
    private String family;
    /**
     * 字段名转大写, 默认为true
     */
    private boolean uppercaseQualifier=true;
    /**
     * 批量提交的大小, ETL中用到
     */
    private int commitBatch;
    /**
     *  组成rowkey的字段名,必须用逗号分隔
     */
    private String rowKey;
    /**
     * 状态
     */
    private int status= FlowStatusEnum.FLOWSTATUS_INIT.getCode();
}

flink状态类编程

package dbus.function;

import com.alibaba.otter.canal.protocol.FlatMessage;
import dbus.incrementssync.IncrementSyncApp;
import dbus.model.Flow;
import enums.FlowStatusEnum;
import org.apache.flink.api.common.state.BroadcastState;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.util.Collector;

public class DbusProcessFunction extends KeyedBroadcastProcessFunction<String, FlatMessage, Flow, Tuple2<FlatMessage, Flow>> {
    @Override
    public void processElement(FlatMessage flatMessage, ReadOnlyContext ctx, Collector<Tuple2<FlatMessage, Flow>> out) throws Exception {
        //获取配置流
        Flow flow = ctx.getBroadcastState(IncrementSyncApp.flowStateDescriptor).get(flatMessage.getDatabase() + flatMessage.getTable());
        if (null != flow && flow.getStatus() == FlowStatusEnum.FLOWSTATUS_RUNNING.getCode()) {
            out.collect(Tuple2.of(flatMessage, flow));
        }
    }

    @Override
    public void processBroadcastElement(Flow flow, Context ctx, Collector<Tuple2<FlatMessage, Flow>> collector) throws Exception {

        //获取state 状态
        BroadcastState<String, Flow> broadcastState = ctx.getBroadcastState(IncrementSyncApp.flowStateDescriptor);

        //更新state
        broadcastState.put(flow.getDatabaseName() + flow.getTableName(), flow);
    }
}

MD5加密

package dbus.utils;

import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

public class Md5Utils {
    public static String getMD5String(String str) {
        try {
            MessageDigest instance = MessageDigest.getInstance("MD5");
            byte[] digest = instance.digest(str.getBytes(StandardCharsets.UTF_8));

            StringBuffer sb = new StringBuffer();

            for (byte by : digest) {
                // 获取字节的低八位有效值
                int i = by & 0xff;
                // 将整数转为16进制
                String hexString = Integer.toHexString(i);

                if (hexString.length() < 2) {
                    // 如果是1位的话,补0
                    hexString = "0" + hexString;
                }
                sb.append(hexString);
            }

            return sb.toString();

        } catch (NoSuchAlgorithmException e) {
            e.printStackTrace();
            return null;
        }

    }
}

Hbase sink 模板

bean对象

package dbus.sink;

import java.util.ArrayList;
import java.util.List;

public class HRow {
    private byte[] rowkey;
    private List<HCell> cells = new ArrayList<>();

    public HRow() {
    }

    public HRow(byte[] rowkey) {
        this.rowkey = rowkey;
    }

    public byte[] getRowkey() {
        return rowkey;
    }

    public void setRowkey(byte[] rowkey) {
        this.rowkey = rowkey;
    }

    public List<HCell> getCells() {
        return cells;
    }

    public void setCells(List<HCell> cells) {
        this.cells = cells;
    }

    public void addCell(String family, String qualifier, byte[] value) {
        HCell hCell = new HCell(family, qualifier, value);
        cells.add(hCell);
    }


    public class HCell{
        private String family;
        private String qualifier;
        private byte[] value;

        public HCell() {
        }

        public HCell(String family, String qualifier, byte[] value) {
            this.family = family;
            this.qualifier = qualifier;
            this.value = value;
        }

        public String getFamily() {
            return family;
        }

        public void setFamily(String family) {
            this.family = family;
        }

        public String getQualifier() {
            return qualifier;
        }

        public void setQualifier(String qualifier) {
            this.qualifier = qualifier;
        }

        public byte[] getValue() {
            return value;
        }

        public void setValue(byte[] value) {
            this.value = value;
        }
    }
}

hbase 工具类

package dbus.sink;

import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

/**
 * hbase 工具类
 */
@Slf4j
public class HbaseTemplate implements Serializable {

    private Configuration hbaseConfig;                                      // hbase配置对象
    private Connection    conn;                                             // hbase连接

    public HbaseTemplate(Configuration hbaseConfig){
        this.hbaseConfig = hbaseConfig;
        initConn();
    }

    private void initConn() {
        try {
            this.conn = ConnectionFactory.createConnection(hbaseConfig);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public Connection getConnection() {
        if (conn == null || conn.isAborted() || conn.isClosed()) {
            initConn();
        }
        return conn;
    }

    public boolean tableExists(String tableName) {
        try (HBaseAdmin admin = (HBaseAdmin) getConnection().getAdmin()) {

            return admin.tableExists(TableName.valueOf(tableName));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public void createTable(String tableName, String... familyNames) {
        try (HBaseAdmin admin = (HBaseAdmin) getConnection().getAdmin()) {

            HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(tableName));
            // 添加列簇
            if (familyNames != null) {
                for (String familyName : familyNames) {
                    HColumnDescriptor hcd = new HColumnDescriptor(familyName);
                    desc.addFamily(hcd);
                }
            }
            admin.createTable(desc);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public void disableTable(String tableName) {
        try (HBaseAdmin admin = (HBaseAdmin) getConnection().getAdmin()) {
            admin.disableTable(tableName);
        } catch (IOException e) {
            log.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }
    }

    public void deleteTable(String tableName) {
        try (HBaseAdmin admin = (HBaseAdmin) getConnection().getAdmin()) {
            if (admin.isTableEnabled(tableName)) {
                disableTable(tableName);
            }
            admin.deleteTable(tableName);
        } catch (IOException e) {
            log.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }
    }

    /**
     * 插入一行数据
     *
     * @param tableName 表名
     * @param hRow 行数据对象
     * @return 是否成功
     */
    public Boolean put(String tableName, HRow hRow) {
        boolean flag = false;
        try {
            HTable table = (HTable) getConnection().getTable(TableName.valueOf(tableName));
            Put put = new Put(hRow.getRowkey());
            for (HRow.HCell hCell : hRow.getCells()) {
                put.addColumn(Bytes.toBytes(hCell.getFamily()), Bytes.toBytes(hCell.getQualifier()), hCell.getValue());
            }
            table.put(put);
            flag = true;
        } catch (Exception e) {
            log.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }
        return flag;

    }

    /**
     * 批量插入
     *
     * @param tableName 表名
     * @param rows 行数据对象集合
     * @return 是否成功
     */
    public Boolean puts(String tableName, List<HRow> rows) {

        boolean flag = false;
        try {
            HTable table = (HTable) getConnection().getTable(TableName.valueOf(tableName));
            List<Put> puts = new ArrayList<>();
            System.out.println(tableName+"------------------------------------------------");

            for (HRow hRow : rows) {
                Put put = new Put(hRow.getRowkey());
                for (HRow.HCell hCell : hRow.getCells()) {
                    put.addColumn(Bytes.toBytes(hCell.getFamily()),
                        Bytes.toBytes(hCell.getQualifier()),
                        hCell.getValue());
                }
                puts.add(put);
            }
            if (!puts.isEmpty()) {
                table.put(puts);
            }
            flag = true;
        } catch (Exception e) {
            log.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }
        return flag;
    }

    /**
     * 批量删除数据
     *
     * @param tableName 表名
     * @param rowKeys rowKey集合
     * @return 是否成功
     */
    public Boolean deletes(String tableName, Set<byte[]> rowKeys) {
        boolean flag = false;
        try {
            HTable table = (HTable) getConnection().getTable(TableName.valueOf(tableName));
            List<Delete> deletes = new ArrayList<>();
            for (byte[] rowKey : rowKeys) {
                Delete delete = new Delete(rowKey);
                deletes.add(delete);
            }
            if (!deletes.isEmpty()) {
                table.delete(deletes);
            }
            flag = true;
        } catch (Exception e) {
            log.error(e.getMessage(), e);
            throw new RuntimeException(e);
        }
        return flag;
    }

    public void close() throws IOException {
        if (conn != null) {
            conn.close();
        }
    }
}

根据cannal中的数据解析成hbase数据格式

package dbus.sink;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.alibaba.otter.canal.protocol.FlatMessage;
import dbus.model.Flow;
import dbus.utils.Md5Utils;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * HBase同步操作业务
 */
@Slf4j
public class HbaseSyncService implements Serializable {
    private HbaseTemplate hbaseTemplate;                                    // HBase操作模板

    public HbaseSyncService(HbaseTemplate hbaseTemplate){
        this.hbaseTemplate = hbaseTemplate;
    }

    public void sync(Flow flow, FlatMessage dml) {
        if (flow != null) {
            String type = dml.getType();
            if (type != null && type.equalsIgnoreCase("INSERT")) {
                insert(flow, dml);
            } else if (type != null && type.equalsIgnoreCase("UPDATE")) {
//                update(flow, dml);
            } else if (type != null && type.equalsIgnoreCase("DELETE")) {
//                delete(flow, dml);
            }
            if (log.isDebugEnabled()) {
                log.debug("DML: {}", JSON.toJSONString(dml, SerializerFeature.WriteMapNullValue));
            }
        }
    }

//    public void sync(Flow flow, Row row) {
//        if (row != null) {
//
//        }
//    }

    /**
     * 插入操作
     *
     * @param flow 配置项
     * @param dml DML数据
     */
    private void insert(Flow flow, FlatMessage dml) {
        List<Map<String, String>> data = dml.getData();
        if (data == null || data.isEmpty()) {
            return;
        }

        int i = 1;
        boolean complete = false;
        List<HRow> rows = new ArrayList<>();
        for (Map<String, String> r : data) {
            HRow hRow = new HRow();

            // 拼接复合rowKey

            if (flow.getRowKey() != null) {
                String[] rowKeyColumns = flow.getRowKey().trim().split(",");
                String rowKeyVale = getRowKey(rowKeyColumns, r);
                hRow.setRowkey(Bytes.toBytes(rowKeyVale));
            }

            convertData2Row(flow, hRow, r);
            if (hRow.getRowkey() == null) {
                throw new RuntimeException("empty rowKey: " + hRow.toString()+",Flow: "+flow.toString());
            }
            rows.add(hRow);
            complete = false;

            if (i % flow.getCommitBatch() == 0 && !rows.isEmpty()) {
                hbaseTemplate.puts(flow.getHbaseTable(), rows);
                rows.clear();
                complete = true;
            }
            i++;
        }
        if (!complete && !rows.isEmpty()) {
            hbaseTemplate.puts(flow.getHbaseTable(), rows);
        }

    }

    /**
     * 获取复合字段作为rowKey的拼接
     *
     * @param rowKeyColumns 复合rowK对应的字段
     * @param data 数据
     * @return
     */
    private static String getRowKey(String[] rowKeyColumns, Map<String, String> data) {
        StringBuilder rowKeyValue = new StringBuilder();
        for (String rowKeyColumnName : rowKeyColumns) {
            Object obj = data.get(rowKeyColumnName);
            if (obj != null) {
                rowKeyValue.append(obj.toString());
            }
            rowKeyValue.append("|");
        }
        int len = rowKeyValue.length();
        if (len > 0) {
            rowKeyValue.delete(len - 1, len);
        }

        //可自行扩展支持多种rowkey生成策略,这里写死为md5前缀
        return Md5Utils.getMD5String(rowKeyValue.toString()).substring(0, 8) + "_" + rowKeyValue.toString();
    }


    /**
     * 将Map数据转换为HRow行数据
     *
     * @param flow hbase映射配置
     * @param hRow 行对象
     * @param data Map数据
     */
    private static void convertData2Row(Flow flow, HRow hRow, Map<String, String> data) {
        String familyName = flow.getFamily();

        for (Map.Entry<String, String> entry : data.entrySet()) {
            if (entry.getValue() != null) {

                byte[] bytes = Bytes.toBytes(entry.getValue().toString());

                String qualifier = entry.getKey();
                if (flow.isUppercaseQualifier()) {
                    qualifier = qualifier.toUpperCase();
                }

                hRow.addCell(familyName, qualifier, bytes);
            }
        }
    }
}


通用sink

package dbus.sink;

import com.alibaba.otter.canal.protocol.FlatMessage;
import dbus.config.GlobalConfig;
import dbus.model.Flow;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.hadoop.hbase.HBaseConfiguration;

@Slf4j
public class HbaseSyncSink extends RichSinkFunction<Tuple2<FlatMessage, Flow>> {

    private HbaseSyncService hbaseSyncService;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);

        org.apache.hadoop.conf.Configuration hbaseConfig = HBaseConfiguration.create();
        hbaseConfig.set("hbase.zookeeper.quorum", GlobalConfig.HBASE_ZOOKEEPER_QUORUM);
        hbaseConfig.set("hbase.zookeeper.property.clientPort", GlobalConfig.HBASE_ZOOKEEPER_PROPERTY_CLIENTPORT);
        hbaseConfig.set("zookeeper.znode.parent", GlobalConfig.ZOOKEEPER_ZNODE_PARENT);

        HbaseTemplate hbaseTemplate = new HbaseTemplate(hbaseConfig);
        hbaseSyncService = new HbaseSyncService(hbaseTemplate);
    }

    @Override
    public void close() throws Exception {
        super.close();
    }

    @Override
    public void invoke(Tuple2<FlatMessage, Flow> value, Context context) throws Exception {
        hbaseSyncService.sync(value.f1, value.f0);
    }

}

发布了483 篇原创文章 · 获赞 62 · 访问量 14万+

猜你喜欢

转载自blog.csdn.net/wwwzydcom/article/details/104095715