1.需求
/** * 需求:每两个元素输出一次 */ 2 代码
package com.nx.streaming.lesson03; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; /** * 需求:每两个元素输出一次 */ public class TestOperatorStateMain { public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Tuple2<String, Integer>> dataStreamSource = env.fromElements( Tuple2.of("Spark", 3), Tuple2.of("Flink", 5), Tuple2.of("Hadoop", 7), Tuple2.of("Spark", 4)); dataStreamSource .addSink(new CustomSink(2)).setParallelism(1); env.execute("TestOperatorStateMain..."); } }
package com.nx.streaming.lesson03; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.typeinfo.TypeHint; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.runtime.state.FunctionInitializationContext; import org.apache.flink.runtime.state.FunctionSnapshotContext; import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; import org.apache.flink.streaming.api.functions.sink.SinkFunction; import java.util.ArrayList; import java.util.List; /** * * */ public class CustomSink implements SinkFunction<Tuple2<String, Integer>>, CheckpointedFunction { // 用于缓存结果数据的 private List<Tuple2<String, Integer>> bufferElements; // 表示内存中数据的大小阈值 private int threshold; // 用于保存内存中的状态信息 private ListState<Tuple2<String, Integer>> checkpointState; // StateBackend // checkpoint public CustomSink(int threshold) { this.threshold = threshold; this.bufferElements = new ArrayList<>(); } @Override public void invoke(Tuple2<String, Integer> value, Context context) throws Exception { // 可以将接收到的每一条数据保存到任何的存储系统中 bufferElements.add(value); if (bufferElements.size() == threshold) { // 简单打印 System.out.println(bufferElements); bufferElements.clear(); } } // 用于将内存中数据保存到状态中 @Override public void snapshotState(FunctionSnapshotContext context) throws Exception { checkpointState.clear(); for (Tuple2<String, Integer> ele : bufferElements) { checkpointState.add(ele); } } // 用于在程序挥发的时候从状态中恢复数据到内存 @Override public void initializeState(FunctionInitializationContext context) throws Exception { ListStateDescriptor<Tuple2<String, Integer>> descriptor = new ListStateDescriptor<Tuple2<String, Integer>>( "bufferd -elements", TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {})); // 注册一个 operator state checkpointState = context.getOperatorStateStore().getListState(descriptor); if (context.isRestored()) { for (Tuple2<String, Integer> ele : checkpointState.get()) { bufferElements.add(ele); } } } }