- State分类
- 算子状态 OperatorState
- 列表状态 ListState
- 联合列表状态 UnionListState
- 广播状态 BroadcastState
- 键控状态 KeyedState
- 值状态 ValueState
- 列表状态 ListState
- 映射状态 MapState
- 聚合状态 ReducingState & AggregatingState
- 提供一个SourceFunction,方便后面测试
public class CustomSourceFunction extends RichSourceFunction<Tuple2<String, Long>> {
private boolean flag = true;
@Override
public void run(SourceContext<Tuple2<String, Long>> ctx) throws Exception {
List<String> data = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
Random random = new Random();
while (flag) {
Thread.sleep(100);
String key = data.get(random.nextInt(data.size()));
long value = System.currentTimeMillis();
ctx.collect(Tuple2.of(key, value));
}
}
@Override
public void cancel() {
flag = false;
}
}
- ValueState 示例
public class ValueStateDemo {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
CustomSourceFunction sourceFunction = new CustomSourceFunction();
DataStreamSource<Tuple2<String, Long>> customDS = env.addSource(sourceFunction);
customDS.keyBy(value -> value.f0)
.flatMap(flatMapWithState)
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
private static RichFlatMapFunction<Tuple2<String, Long>, String> flatMapWithState = new RichFlatMapFunction<Tuple2<String, Long>, String>() {
private ValueState<Long> timeState;
@Override
public void open(Configuration parameters) throws Exception {
timeState = getRuntimeContext().getState(new ValueStateDescriptor<>("maxTime", Long.class));
}
@Override
public void flatMap(Tuple2<String, Long> value, Collector<String> out) throws Exception {
Long maxTime = timeState.value();
if (maxTime == null || value.f1 > maxTime) {
timeState.update(value.f1);
out.collect(value.f0 + "|" + value.f1);
} else {
System.out.println("---- Warning! ----");
}
}
};
}
- ListState 示例
ListState<String> myListState = getRuntimeContext().getListState(new ListStateDescriptor<String>("my_liststate", String.class));
myListState.add("state_1");
Iterable<String> stateIter = myListState.get();
for (String state : stateIter) {
System.out.println("state = " + state);
}
- MapState 示例
MapState<String, Long> myMapState = getRuntimeContext().getMapState(new MapStateDescriptor<String, Long>("my_mapstate", String.class, Long.class));
myMapState.put("state_key_1", 1L);
Long value = myMapState.get("state_key_1");
- ReducingState 示例
ReducingStateDescriptor<Long> stateDescriptor = new ReducingStateDescriptor<>("my_reducingstate", Math::max, Long.class);
ReducingState<Long> reducingState = getRuntimeContext().getReducingState(stateDescriptor);
reducingState.add(100L);
Long result = reducingState.get();
- AggregatingState与ReducingState同理,不做展示
- BroadcastState 示例
public class BroadcastStateDemo {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
TagsSourceFunction tagsSourceFunction = new TagsSourceFunction();
DataStreamSource<String> tagDS = env.addSource(tagsSourceFunction);
BroadcastStream<String> myBroadcast = tagDS.broadcast(new MapStateDescriptor<>("my_broadcast", String.class, String.class));
EventSourceFunction eventSourceFunction = new EventSourceFunction();
DataStreamSource<Tuple2<String, Long>> eventDS = env.addSource(eventSourceFunction);
DataStream<Tuple2<String, Long>> resultDS = eventDS.connect(myBroadcast)
.process(new BroadcastProcessFunction<Tuple2<String, Long>, String, Tuple2<String, Long>>() {
private MapStateDescriptor<String, String> stateDescriptor = new MapStateDescriptor<>("my_broadcast", String.class, String.class);;
@Override
public void processElement(Tuple2<String, Long> value, ReadOnlyContext ctx, Collector<Tuple2<String, Long>> out) throws Exception {
ReadOnlyBroadcastState<String, String> broadcastState = ctx.getBroadcastState(stateDescriptor);
if (broadcastState.contains(value.f0)) {
out.collect(value);
}
}
@Override
public void processBroadcastElement(String value, Context ctx, Collector<Tuple2<String, Long>> out) throws Exception {
BroadcastState<String, String> broadcastState = ctx.getBroadcastState(stateDescriptor);
broadcastState.put(value, "");
}
});
resultDS.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
public static class TagsSourceFunction extends RichSourceFunction<String> {
private boolean flag = true;
@Override
public void run(SourceContext<String> ctx) throws Exception {
List<String> data = Arrays.asList("b", "e", "g");
Random random = new Random();
while (flag) {
ctx.collect(data.get(random.nextInt(data.size())));
Thread.sleep(5000);
}
}
@Override
public void cancel() {
flag = false;
}
}
public static class EventSourceFunction extends RichSourceFunction<Tuple2<String, Long>> {
private boolean flag = true;
@Override
public void run(SourceContext<Tuple2<String, Long>> ctx) throws Exception {
List<String> data = Arrays.asList("a", "b","c", "d", "e","f", "g", "h", "i", "j", "k");
Random random = new Random();
while (flag) {
Thread.sleep(100);
String key = data.get(random.nextInt(data.size()));
ctx.collect(Tuple2.of(key, System.currentTimeMillis()));
}
}
@Override
public void cancel() {
flag = false;
}
}
}
- 用于Flink自动存储/恢复应用信息
- 启用 Checkpoint
env.enableCheckpointing(60 * 1000);
- Timeout 超时
env.getCheckpointConfig().setCheckpointTimeout(1000);
- FailOnCheckpointingErrors
env.getCheckpointConfig().setFailOnCheckpointingErrors(false);
- 最大同时运行的Checkpoint个数
env.getCheckpointConfig().setMaxConcurrentCheckpoints(3);
- Checkpoint之间的最小间隔时间
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(1000);
- enableExternalizedCheckpoints
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION);
- 保存多个Checkpoint,编辑conf/flink-conf.yaml
// 默认一个最新的
state.checkpoints.num-retained: 10
- 重启策略
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 1000));
env.setRestartStrategy(RestartStrategies.failureRateRestart(3, Time.minutes(60), Time.seconds(10)));
- 从Checkpoint恢复Job
bin/flink run -s hdfs://skey_01:9000/flink-1.7.2/flink-checkpoints/19dd7c456b5507dc6b65cc836f319dd7/chk-30/_metadata flink-job.jar