Flink之keyBy开窗实战

前言

最近打算用fink 处理历史数据,既是flink 读取clickhouse数据,做数据回放,数据力度向上汇聚。

1.自定义clickhouse数据源

自定义数据源

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.types.Row;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

/**
 * @author fl
 * @date: 2021-10-15 14:00
 * @description: 自定义数据源
 */
public class SourceFormClickhouse extends RichSourceFunction<Row> {
    
    

    /**
     * 预编译语句
     */
    PreparedStatement ps = null;
    /**
     * 结果集
     */
    ResultSet result = null;
    /**
     * 链接
     */
    Connection conn = null;
    /**
     * 执行状态标识
     */
    private boolean flag = true;

    @Override
    public void open(Configuration parameters) throws Exception {
    
    
        Connection conn = JdbcUtil.getClickhouseConnection();
        StringBuilder sql = new StringBuilder();
        sql.append("SELECT\n" +
                "\tdate_time,\n" +
                "\ths_security_id,\n" +
                "\tsecurity_id,\n" +
                "\tpre_close_px,\n" +
                "\topen_px,\n" +
                "\thigh_px,\n" +
                "\tlow_px,\n" +
                "\tlast_px,\n" +
                "\tnum_trades,\n" +
                "\tvolume,\n" +
                "\tamount,\n" +
                "\tphase_code,\n" +
                "\tbid_price,\n" +
                "\tbid_qty,\n" +
                "\toffer_price,\n" +
                "\toffer_qty \n" +
                "FROM\n" +
                "xxx");
        ps = conn.prepareStatement(sql.toString());
        super.open(parameters);
    }

    @Override
    public void run(SourceContext ctx) throws Exception {
    
    
        while (flag) {
    
    
            result = ps.executeQuery();
            while (result.next()) {
    
    
                Row row = new Row(16);
                row.setField(0, result.getString("date_time"));
                row.setField(1, result.getString("hs_security_id"));
                row.setField(2, result.getString("security_id"));
                row.setField(3, result.getLong("pre_close_px"));
                row.setField(4, result.getLong("open_px"));
                row.setField(5, result.getLong("high_px"));
                row.setField(6, result.getLong("low_px"));
                row.setField(7, result.getLong("last_px"));
                row.setField(8, result.getLong("num_trades"));
                row.setField(9, result.getLong("volume"));
                row.setField(10, result.getLong("amount"));
                row.setField(11, result.getLong("phase_code"));
                // 数组用字符串接收
                row.setField(12, result.getString("bid_price"));
                row.setField(13, result.getString("bid_qty"));
                row.setField(14, result.getString("offer_price"));
                row.setField(15, result.getString("offer_qty"));
                ctx.collect(row);
            }
        }
    }

    @Override
    public void cancel() {
    
    
        flag = false;
    }

    @Override
    public void close() throws Exception {
    
    
        JdbcUtil.close(conn, ps, result);
    }
}


2. 引入数据源开始处理数据

这里注意 keyBy 数据的使用,keyBy 一个字段 只需在里面遍历对象获取即可,如果是想keyBy后多个字段进行分组,keyby(“field1,field2,field3”)这种方式之前可用,但是我使用的是flink 版本是1.13.1 这个方法已经废弃。
现在新的处理方式 可用用元组来定义你需要排序的字段。

 dataStreamSource.keyBy(t-> Tuple2.of(t.getHsSecurityId(),t.getSecurityId()))

/**
 * @author fl
 * @date: 2021-10-14 17:18
 * @description: 离线数汇聚不同时间粒度的数据
 */
public class OfflineDataAggregation implements JobRunner, Serializable {
    
    
    @Override
    public void run(String[] args) throws Throwable {
    
    
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        DataStreamSource<Snapshot> dataStreamSource = env.addSource(new SourceFormClickhouse2());
        dataStreamSource.keyBy(t-> Tuple2.of(t.getHsSecurityId(),t.getSecurityId()))
                .window(TumblingEventTimeWindows.of(Time.seconds(15)))
                .process(new ProcessWindowFunction<Snapshot, Object, Tuple2<String, String>, TimeWindow>() {
    
    
                    @Override
                    public void process(Tuple2<String, String> stringStringTuple2, Context context, Iterable<Snapshot> elements, Collector<Object> out) throws Exception {
    
    

                    }
                });
        dataStreamSource.print();
        env.execute("快照数据读取");
    }
}

猜你喜欢

转载自blog.csdn.net/weixin_43975771/article/details/120807402