flink 流的合并操作

  • union
    union只能合并类型相同的数据,合并的结果仍然是DataStream,结果操作与未合并之前一致。
public static void main(String[] args) throws Exception {


        //流的合并操作  union 只能合并类型相同的流
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<String> ds1 = env.fromElements("night", "Jim", "Mary");

        DataStreamSource<String> ds2 = env.fromElements("四川", "北京", "上海");

        DataStream<String> union = ds1.union(ds2);

        union.print();
        env.execute();
    }

11> 北京
9> Mary
12> 上海
8> Jim
7> night
10> 四川
  • connect
    connect可以连接不同类型的流,后续的处理api也有类似的不同,下列是一个tuple2与Long类型的流合并的结果,做了一个keyBy之后,在map的操作,map的实现接口是CoMapFunction
public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<Tuple2<String, String>> ds1 = env.fromElements(Tuple2.of("四川", "成都"), Tuple2.of("北京", "朝阳"), Tuple2.of("广东", "深圳"),Tuple2.of("四川", "成都"));

        DataStreamSource<Long> ds2 = env.fromElements(1L, 2L, 3L,2L);


        ConnectedStreams<Tuple2<String, String>, Long> connect = ds1.connect(ds2);

        connect.keyBy(data -> data.f0,data -> data).map(new CoMapFunction<Tuple2<String, String>, Long, String>() {
//
            @Override
            public String map1(Tuple2<String, String> stringStringTuple2) throws Exception {
                return "this is tuple" + stringStringTuple2;
            }

            @Override
            public String map2(Long aLong) throws Exception {
                return "this is number" + aLong;
            }
        }).print();

        env.execute();

6> this is tuple(广东,深圳)
7> this is tuple(北京,朝阳)
15> this is number3
16> this is tuple(四川,成都)
11> this is number1
16> this is number2
16> this is tuple(四川,成都)
16> this is number2

connect之后process处理

public static void main(String[] args) throws Exception {

        //处理逻辑
        //1、连接两个约会输入流  一方为boy流  一方为girl流,
        //2、数据连接之后,兴趣爱好分区keyBy,有相同兴趣爱好的boy与girl约会成功
        //3、同一个兴趣分区的数据,用两个state保存状态,boyState保存boy的兴趣,girlState保存girl的兴趣
        //4、每次触发一方的process计算,判断另一方的process的兴趣状态是否为空
        //5、为空,保存该兴趣的process状态,并设置触发时间
        //6、不为空,表示有相同兴趣的异性,约会成功,清除另一个process状态
        //7、触发定时器,没有相同兴趣的异常,约会失败,继续单身
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setParallelism(1);

        //// Tuple4.of(姓名, 爱好, 时间)
        SingleOutputStreamOperator<Tuple3<String, String, Long>> ds1 = env.fromElements(Tuple3.of("Tom", "sing", 1000L),
                Tuple3.of("Jim", "Swimming", 2000L)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forMonotonousTimestamps()
                .withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
                    @Override
                    public long extractTimestamp(Tuple3<String, String, Long> element, long recordTimestamp) {
                        return element.f2;
                    }
                }));

        // Tuple4.of(姓名, 爱好, 简介, 时间)
        //这里模拟两个不同的数据,主要是为了区分connect是可以处理不同类型的输入流
        SingleOutputStreamOperator<Tuple4<String, String, String, Long>> ds2 = env.fromElements(Tuple4.of("Mary", "Swimming", "lovely", 3000L),
                        Tuple4.of("Julie", "binge-watching", "sexy", 4000L))
                .assignTimestampsAndWatermarks(WatermarkStrategy
                        .<Tuple4<String, String, String, Long>>forMonotonousTimestamps()
                        .withTimestampAssigner(new SerializableTimestampAssigner<Tuple4<String, String, String, Long>>() {
                            @Override
                            public long extractTimestamp(Tuple4<String, String, String, Long> element, long recordTimestamp) {
                                return element.f3;
                            }
                        }));

        //keyBy 很关键 把有相同兴趣的爱好者放在一个桶里面了
        ds1.connect(ds2).keyBy(data -> data.f1,data -> data.f1)
                .process(new AppointmentProcessFunction()).print();

        env.execute();


    }

    public static class AppointmentProcessFunction extends CoProcessFunction<Tuple3<String, String, Long>,Tuple4<String, String, String, Long>,String> {

        private ValueState<Tuple3<String, String, Long>> boyState;

        private ValueState<Tuple4<String, String, String, Long>> girlState;


        @Override
        public void open(Configuration parameters) throws Exception {

            boyState = getRuntimeContext()
                    .getState(new ValueStateDescriptor<Tuple3<String, String, Long>>("boyState", Types.TUPLE(Types.STRING,Types.STRING,Types.LONG)));
            girlState = getRuntimeContext()
                    .getState(new ValueStateDescriptor<Tuple4<String,String, String, Long>>("girlState", Types.TUPLE(Types.STRING,Types.STRING,Types.STRING,Types.LONG)));
        }

        @Override
        public void processElement1(Tuple3<String, String, Long> value, CoProcessFunction<Tuple3<String, String, Long>, Tuple4<String, String, String, Long>, String>.Context context, Collector<String> collector) throws Exception {

            if(girlState.value() != null){

                collector.collect("约会成功,牵手对象 :" + value +" " + girlState.value());

                //清空状态
                girlState.clear();
            }else {
                //更新状态
                boyState.update(value);
                //注册一个5秒后的定时器 等待另一条流的事件
                context.timerService().registerEventTimeTimer(value.f2 + 5000L);
            }
        }

        @Override
        public void processElement2(Tuple4<String, String, String, Long> value, CoProcessFunction<Tuple3<String, String, Long>, Tuple4<String, String, String, Long>, String>.Context context, Collector<String> collector) throws Exception {

            if(boyState.value() != null){
                collector.collect("约会成功,牵手对象 :" + value +" " + boyState.value());

                //清空状态
                boyState.clear();
            }else {
                //更新状态
                girlState.update(value);
                //注册一个5秒后的定时器 等待另一条流的事件
                context.timerService().registerEventTimeTimer(value.f3 + 5000L);
            }
        }


        @Override
        public void onTimer(long timestamp, CoProcessFunction<Tuple3<String, String, Long>, Tuple4<String, String, String, Long>, String>.OnTimerContext ctx, Collector<String> out) throws Exception {

            if(boyState.value() != null){
                out.collect("约会失败 :" + boyState.value() + " 单身男孩");
            }

            if(girlState.value() != null){
                out.collect("约会失败 :" + girlState.value() + " 单身女孩");
            }

            boyState.clear();;
            girlState.clear();
        }
    }

约会成功,牵手对象 :(Jim,Swimming,2000) (Mary,Swimming,lovely,3000)
约会失败 :(Tom,sing,1000) 单身男孩
约会失败 :(Julie,binge-watching,sexy,4000) 单身女孩