join+window+eventtime

如果使用eventtime需要注意的事情比较多,否则会出现十分诡异的不触发计算的情况,直接看如下示例代码

public class People {

    String age;
    long eventTime;
    String eventTimeStr;
    String id;
    String name;


    public People(String age, long eventTime, String id, String name) {
        this.age = age;
        this.eventTime = eventTime;
        this.id = id;
        this.name = name;
    }

    public String getAge() {
        return age;
    }

    public void setAge(String age) {
        this.age = age;
    }

    public long getEventTime() {
        return eventTime;
    }

    public void setEventTime(long eventTime) {
        this.eventTime = eventTime;
    }

    public String getEventTimeStr() {
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        return simpleDateFormat.format(eventTime);
    }

    public void setEventTimeStr(String eventTimeStr) {
        this.eventTimeStr = eventTimeStr;
    }

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }
}
public class MyTestJob {

    public static void main(String[] args) throws Exception {


        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 10 * 1000));
        String checkpointPath = "file:///Users/kkk/checkpoints/cpk/ttttt6666";

        //重启策略
        //状态checkpoint保存
        StateBackend fsStateBackend = new FsStateBackend(checkpointPath);
        env.setStateBackend(fsStateBackend);
        env.getCheckpointConfig().setFailOnCheckpointingErrors(false);
        env.enableCheckpointing(60 * 1000).getCheckpointConfig().enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);


        DataStream<String> nameText = env.socketTextStream("127.0.0.1", 9999);

        DataStream<People> nameStream = nameText.flatMap(new FlatMapFunction<String, People>() {
            @Override
            public void flatMap(String s, Collector<People> collector) throws Exception {
                System.out.println("name:" + s);
                String[] s1 = s.split("\\|");
                if (s1.length >= 4) {
                    // 0|1602951665626|1|1
                    collector.collect(new People(s1[0], System.currentTimeMillis(), s1[2], s1[3]));
                }
            }
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<People>(Time.seconds(1)) {
            @Override
            public long extractTimestamp(People people) {
                return people.eventTime;
            }
        });


        DataStream<String> ageText = env.socketTextStream("127.0.0.1", 9998);

        DataStream<People> ageStream = ageText.flatMap(new FlatMapFunction<String, People>() {
            @Override
            public void flatMap(String s, Collector<People> collector) throws Exception {
                System.out.println("age:" + s);
                String[] s1 = s.split("\\|");
                if (s1.length >= 4) {
                    // 0|1602951665626|1|1
                    collector.collect(new People(s1[0], System.currentTimeMillis(), s1[2], s1[3]));
                }
            }
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<People>(Time.seconds(1)) {
            @Override
            public long extractTimestamp(People people) {
                return people.eventTime;
            }
        });

        DataStream<People> coStream = nameStream.coGroup(ageStream)
                .where(new KeySelector<People, String>() {
                    @Override
                    public String getKey(People people) throws Exception {
                        return people.id;
                    }
                })
                .equalTo(new KeySelector<People, String>() {
                    @Override
                    public String getKey(People people) throws Exception {
                        return people.id;
                    }
                }).window(TumblingEventTimeWindows.of(Time.minutes(1)))
                .apply(new CoGroupFunction<People, People, People>() {
                    @Override
                    public void coGroup(Iterable<People> nameIterable, Iterable<People> ageIterable, Collector<People> collector) throws Exception {
                        System.out.println("nameIterable:" + JSONObject.toJSONString(nameIterable));
                        System.out.println("ageIterable:" + JSONObject.toJSONString(ageIterable));
                        Map<String, People> tempMap = new HashMap<>();
                        ageIterable.forEach(people -> tempMap.put(people.id, people));

                        Iterator<People> iterator = nameIterable.iterator();
                        while (iterator.hasNext()) {
                            People people = iterator.next();
                            if (tempMap.containsKey(people.id)) {
                                people.age = tempMap.get(people.id).age;
                            }
                            collector.collect(people);
                        }
                    }
                });

        coStream.addSink(new SinkFunction<People>() {
            @Override
            public void invoke(People value, Context context) throws Exception {
                System.out.println("addSink:" +JSONObject.toJSONString(value));
            }
        }).setParallelism(1);
        // execute program
        env.execute("Java from SocketTextStream Example");
    }

要触发eventtime window计算需要以下条件缺一不可

  1. env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 设置eventtime为flink时间特性
  2. 注册 数据中的时间戳为事件时间,注意这里必须数据本身当中包含时间戳,且必须是毫秒时间戳,不能是秒
  3. 同 processtime 必须关联key相等 并且在同一个时间窗口上有符合要求的两个流的数据
  4. 注意即使满足了上述3点,eventtime的window计算还是不会触发,因为eventtime 需要我们自己控制时间线,事件的水位线必须要大于window的end time才会触发计算,也就是说 如果你两个网络端口只各自模拟一条数据 是永远不会触发计算的,必须要有下一条满足条件的数据到达,并且把水位线升高到end time以上,才会触发计算,结合实际考虑 如果你的数据流数据不是连续到达或者中间有较大间隔,eventtime的滚动窗口可能不适合,因为"最后一个"window可能不能及时触发,注意需要join的两个流都有数据水位线高于window endtime才会触发
  5. 满足了上述4点 在本地调试的时候还是可能会触发不了window,这是为什么呢??!!!,因为如果本地idea环境下如果不设置并行度,会默认cpu的核数为并行度,这样流的数据可能被随机分配到不同的pipeline中去执行,因此匹配不到数据就无法满足window的触发条件,如果是产线环境我们势必要多并行,可以根据keyby,把目标数据分到同一个pipeline中
  6. 在本地可设置并行度为1

本地启动两个nc -lk 9999       nc -lk 9998

9999端输入测试数据:

0|1602961668775|6|6

0|1602961668775|7|7

等一分钟后在输入(窗口大小为一分钟):

0|1602961668775|8|8

9998端输入测试数据:

6|1602951882681|6|6

0|1603951882681|10|10

等一分钟后在输入(窗口大小为一分钟):

0|1603951882681|11|11

窗口出发后计算:

nameIterable:[{"age":"0","eventTime":1605664552833,"eventTimeStr":"2020-11-18 09:55:52","id":"6","name":"6"}]
 ageIterable:[{"age":"6","eventTime":1605664552834,"eventTimeStr":"2020-11-18 09:55:52","id":"6","name":"6"}]
addSink:{"age":"6","eventTime":1605664552833,"eventTimeStr":"2020-11-18 09:55:52","id":"6","name":"6"}
 nameIterable:[{"age":"0","eventTime":1605664553132,"eventTimeStr":"2020-11-18 09:55:53","id":"7","name":"7"}]
 ageIterable:[]
addSink:{"age":"0","eventTime":1605664553132,"eventTimeStr":"2020-11-18 09:55:53","id":"7","name":"7"}
 nameIterable:[]
 ageIterable:[{"age":"0","eventTime":1605664553132,"eventTimeStr":"2020-11-18 09:55:53","id":"10","name":"10"}]