flink 流的合并操作
- union
union只能合并类型相同的数据,合并的结果仍然是DataStream,结果操作与未合并之前一致。
public static void main(String[] args) throws Exception {
//流的合并操作 union 只能合并类型相同的流
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> ds1 = env.fromElements("night", "Jim", "Mary");
DataStreamSource<String> ds2 = env.fromElements("四川", "北京", "上海");
DataStream<String> union = ds1.union(ds2);
union.print();
env.execute();
}
11> 北京
9> Mary
12> 上海
8> Jim
7> night
10> 四川
- connect
connect可以连接不同类型的流,后续的处理api也有类似的不同,下列是一个tuple2与Long类型的流合并的结果,做了一个keyBy之后,在map的操作,map的实现接口是CoMapFunction
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<Tuple2<String, String>> ds1 = env.fromElements(Tuple2.of("四川", "成都"), Tuple2.of("北京", "朝阳"), Tuple2.of("广东", "深圳"),Tuple2.of("四川", "成都"));
DataStreamSource<Long> ds2 = env.fromElements(1L, 2L, 3L,2L);
ConnectedStreams<Tuple2<String, String>, Long> connect = ds1.connect(ds2);
connect.keyBy(data -> data.f0,data -> data).map(new CoMapFunction<Tuple2<String, String>, Long, String>() {
//
@Override
public String map1(Tuple2<String, String> stringStringTuple2) throws Exception {
return "this is tuple" + stringStringTuple2;
}
@Override
public String map2(Long aLong) throws Exception {
return "this is number" + aLong;
}
}).print();
env.execute();
6> this is tuple(广东,深圳)
7> this is tuple(北京,朝阳)
15> this is number3
16> this is tuple(四川,成都)
11> this is number1
16> this is number2
16> this is tuple(四川,成都)
16> this is number2
connect之后process处理
public static void main(String[] args) throws Exception {
//处理逻辑
//1、连接两个约会输入流 一方为boy流 一方为girl流,
//2、数据连接之后,兴趣爱好分区keyBy,有相同兴趣爱好的boy与girl约会成功
//3、同一个兴趣分区的数据,用两个state保存状态,boyState保存boy的兴趣,girlState保存girl的兴趣
//4、每次触发一方的process计算,判断另一方的process的兴趣状态是否为空
//5、为空,保存该兴趣的process状态,并设置触发时间
//6、不为空,表示有相同兴趣的异性,约会成功,清除另一个process状态
//7、触发定时器,没有相同兴趣的异常,约会失败,继续单身
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
//// Tuple4.of(姓名, 爱好, 时间)
SingleOutputStreamOperator<Tuple3<String, String, Long>> ds1 = env.fromElements(Tuple3.of("Tom", "sing", 1000L),
Tuple3.of("Jim", "Swimming", 2000L)).assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forMonotonousTimestamps()
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, String, Long> element, long recordTimestamp) {
return element.f2;
}
}));
// Tuple4.of(姓名, 爱好, 简介, 时间)
//这里模拟两个不同的数据,主要是为了区分connect是可以处理不同类型的输入流
SingleOutputStreamOperator<Tuple4<String, String, String, Long>> ds2 = env.fromElements(Tuple4.of("Mary", "Swimming", "lovely", 3000L),
Tuple4.of("Julie", "binge-watching", "sexy", 4000L))
.assignTimestampsAndWatermarks(WatermarkStrategy
.<Tuple4<String, String, String, Long>>forMonotonousTimestamps()
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple4<String, String, String, Long>>() {
@Override
public long extractTimestamp(Tuple4<String, String, String, Long> element, long recordTimestamp) {
return element.f3;
}
}));
//keyBy 很关键 把有相同兴趣的爱好者放在一个桶里面了
ds1.connect(ds2).keyBy(data -> data.f1,data -> data.f1)
.process(new AppointmentProcessFunction()).print();
env.execute();
}
public static class AppointmentProcessFunction extends CoProcessFunction<Tuple3<String, String, Long>,Tuple4<String, String, String, Long>,String> {
private ValueState<Tuple3<String, String, Long>> boyState;
private ValueState<Tuple4<String, String, String, Long>> girlState;
@Override
public void open(Configuration parameters) throws Exception {
boyState = getRuntimeContext()
.getState(new ValueStateDescriptor<Tuple3<String, String, Long>>("boyState", Types.TUPLE(Types.STRING,Types.STRING,Types.LONG)));
girlState = getRuntimeContext()
.getState(new ValueStateDescriptor<Tuple4<String,String, String, Long>>("girlState", Types.TUPLE(Types.STRING,Types.STRING,Types.STRING,Types.LONG)));
}
@Override
public void processElement1(Tuple3<String, String, Long> value, CoProcessFunction<Tuple3<String, String, Long>, Tuple4<String, String, String, Long>, String>.Context context, Collector<String> collector) throws Exception {
if(girlState.value() != null){
collector.collect("约会成功,牵手对象 :" + value +" " + girlState.value());
//清空状态
girlState.clear();
}else {
//更新状态
boyState.update(value);
//注册一个5秒后的定时器 等待另一条流的事件
context.timerService().registerEventTimeTimer(value.f2 + 5000L);
}
}
@Override
public void processElement2(Tuple4<String, String, String, Long> value, CoProcessFunction<Tuple3<String, String, Long>, Tuple4<String, String, String, Long>, String>.Context context, Collector<String> collector) throws Exception {
if(boyState.value() != null){
collector.collect("约会成功,牵手对象 :" + value +" " + boyState.value());
//清空状态
boyState.clear();
}else {
//更新状态
girlState.update(value);
//注册一个5秒后的定时器 等待另一条流的事件
context.timerService().registerEventTimeTimer(value.f3 + 5000L);
}
}
@Override
public void onTimer(long timestamp, CoProcessFunction<Tuple3<String, String, Long>, Tuple4<String, String, String, Long>, String>.OnTimerContext ctx, Collector<String> out) throws Exception {
if(boyState.value() != null){
out.collect("约会失败 :" + boyState.value() + " 单身男孩");
}
if(girlState.value() != null){
out.collect("约会失败 :" + girlState.value() + " 单身女孩");
}
boyState.clear();;
girlState.clear();
}
}
约会成功,牵手对象 :(Jim,Swimming,2000) (Mary,Swimming,lovely,3000)
约会失败 :(Tom,sing,1000) 单身男孩
约会失败 :(Julie,binge-watching,sexy,4000) 单身女孩