前言
最近打算用fink 处理历史数据,既是flink 读取clickhouse数据,做数据回放,数据力度向上汇聚。
1.自定义clickhouse数据源
自定义数据源
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import org.apache.flink.types.Row;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
/**
* @author fl
* @date: 2021-10-15 14:00
* @description: 自定义数据源
*/
public class SourceFormClickhouse extends RichSourceFunction<Row> {
/**
* 预编译语句
*/
PreparedStatement ps = null;
/**
* 结果集
*/
ResultSet result = null;
/**
* 链接
*/
Connection conn = null;
/**
* 执行状态标识
*/
private boolean flag = true;
@Override
public void open(Configuration parameters) throws Exception {
Connection conn = JdbcUtil.getClickhouseConnection();
StringBuilder sql = new StringBuilder();
sql.append("SELECT\n" +
"\tdate_time,\n" +
"\ths_security_id,\n" +
"\tsecurity_id,\n" +
"\tpre_close_px,\n" +
"\topen_px,\n" +
"\thigh_px,\n" +
"\tlow_px,\n" +
"\tlast_px,\n" +
"\tnum_trades,\n" +
"\tvolume,\n" +
"\tamount,\n" +
"\tphase_code,\n" +
"\tbid_price,\n" +
"\tbid_qty,\n" +
"\toffer_price,\n" +
"\toffer_qty \n" +
"FROM\n" +
"xxx");
ps = conn.prepareStatement(sql.toString());
super.open(parameters);
}
@Override
public void run(SourceContext ctx) throws Exception {
while (flag) {
result = ps.executeQuery();
while (result.next()) {
Row row = new Row(16);
row.setField(0, result.getString("date_time"));
row.setField(1, result.getString("hs_security_id"));
row.setField(2, result.getString("security_id"));
row.setField(3, result.getLong("pre_close_px"));
row.setField(4, result.getLong("open_px"));
row.setField(5, result.getLong("high_px"));
row.setField(6, result.getLong("low_px"));
row.setField(7, result.getLong("last_px"));
row.setField(8, result.getLong("num_trades"));
row.setField(9, result.getLong("volume"));
row.setField(10, result.getLong("amount"));
row.setField(11, result.getLong("phase_code"));
// 数组用字符串接收
row.setField(12, result.getString("bid_price"));
row.setField(13, result.getString("bid_qty"));
row.setField(14, result.getString("offer_price"));
row.setField(15, result.getString("offer_qty"));
ctx.collect(row);
}
}
}
@Override
public void cancel() {
flag = false;
}
@Override
public void close() throws Exception {
JdbcUtil.close(conn, ps, result);
}
}
2. 引入数据源开始处理数据
这里注意 keyBy 数据的使用,keyBy 一个字段 只需在里面遍历对象获取即可,如果是想keyBy后多个字段进行分组,keyby(“field1,field2,field3”)这种方式之前可用,但是我使用的是flink 版本是1.13.1 这个方法已经废弃。
现在新的处理方式 可用用元组来定义你需要排序的字段。
dataStreamSource.keyBy(t-> Tuple2.of(t.getHsSecurityId(),t.getSecurityId()))
/**
* @author fl
* @date: 2021-10-14 17:18
* @description: 离线数汇聚不同时间粒度的数据
*/
public class OfflineDataAggregation implements JobRunner, Serializable {
@Override
public void run(String[] args) throws Throwable {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Snapshot> dataStreamSource = env.addSource(new SourceFormClickhouse2());
dataStreamSource.keyBy(t-> Tuple2.of(t.getHsSecurityId(),t.getSecurityId()))
.window(TumblingEventTimeWindows.of(Time.seconds(15)))
.process(new ProcessWindowFunction<Snapshot, Object, Tuple2<String, String>, TimeWindow>() {
@Override
public void process(Tuple2<String, String> stringStringTuple2, Context context, Iterable<Snapshot> elements, Collector<Object> out) throws Exception {
}
});
dataStreamSource.print();
env.execute("快照数据读取");
}
}