转换算子
1.txt
sensor_1,111,11.1
sensor_2,121,11.2
sensor_3,211,11.3
sensor_4,311,11.4
1.基本转换算子
map, flatMap, filter
public class TransformTest01_Base {
public static void main(String[] args)throws Exception{
// 基本转化算子
//map, flatMap, filter
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt ";
DataStream dataStream = env.readTextFile(path);
// map:把String转化为Integer输出,输出的为字符长度
DataStream mapStream = dataStream.map(new MapFunction() {
@Override
public Integer map(String value) throws Exception {
return value.length();
}
});
// flatMap:按照逗号切分
DataStream flatMapStream = dataStream.flatMap(new FlatMapFunction() {
@Override
public void flatMap(String value, Collector out) throws Exception {
String[] arr = value.split(",");
for (String s : arr) {
out.collect(s);
}
}
});
// filter
SingleOutputStreamOperator filterStream = dataStream.filter(new FilterFunction() {
@Override
public boolean filter(String value) throws Exception {
return value.startsWith("sensor_1");
}
});
filterStream.print("filterStream");
flatMapStream.print("flatMap");
mapStream.print();
env.execute();
}
}

1.2 聚合函数
keyBy, maxBy
public class TransformTest02_RollingAggregation {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
/*
DataStream dataStream = inputStream.map(new MapFunction() {
@Override
public SensorReading map(String value) throws Exception {
String[] arr = value.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
}
});
*/
// 2
DataStream dataStream = inputStream.map(line -> {
String[] arr = line.split(",");
return new SensorReading(arr[0], new Long(arr[1]), new Double(arr[2]));
});
// 分组
// Tuple:组合
KeyedStream keyedStream = dataStream.keyBy("id");
// 得到的为id
KeyedStream keyedStream1 = dataStream.keyBy(data -> data.getId());
// 滚动聚合
DataStream max = keyedStream.maxBy("temperature");
max.print();
env.execute();
}
}

1.3 Reduce
public class TransformTest03_Reduce {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
DataStream MapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
// 分组
KeyedStream keyedStream = MapStream.keyBy("id");
// reduce
/*
keyedStream.reduce(new ReduceFunction() {
@Override
public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception {
return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(), value2.getTemperature()));
}
});
*/
// 当前状态和当前最新数据
SingleOutputStreamOperator reduce = keyedStream.reduce((curState, newData) -> {
return new SensorReading(curState.getId(), newData.getTimestamp(), Math.max(curState.getTemperature(), newData.getTemperature()));
});
reduce.print();
env.execute();
}
}

1.5 多流函数
DataStream -> SplitStream split/select
合流:connect
public class TransformTest04_MultipleStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
DataStream MapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
// 分流:11.2 DataStream -> SplitStream split/select
SplitStream dataStream = MapStream.split(new OutputSelector() {
@Override
public Iterable select(SensorReading sensorReading) {
return sensorReading.getTemperature() > 11.2 ? Collections.singletonList("high") : Collections.singletonList("low");
}
});
DataStream higthStream = dataStream.select("high");
DataStream lowStream = dataStream.select("low");
higthStream.print("higth");
// 合流:connect
env.execute("stream");
}
}

1.6 富函数
Rich Function
public class TransformTest05_RichFunction {
public static void main(String[] args)throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
DataStream MapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
DataStream> dataStream = MapStream.map(new MyMapper());
dataStream.print();
env.execute();
}
public static class MyMapper1 implements MapFunction>{
@Override
public Tuple2 map(SensorReading value) throws Exception {
return null;
}
}
public static class MyMapper extends RichMapFunction>{
@Override
public Tuple2 map(SensorReading value) throws Exception {
// 获取id 和当前子任务的序号
return new Tuple2<>(value.getId(), getRuntimeContext().getIndexOfThisSubtask());
}
@Override
public void open(Configuration parameters) throws Exception {
System.out.println("open");
}
@Override
public void close() throws Exception {
System.out.println("close");
}
}
}

1.7 分区函数
Partition
public class TransformTest06_Partition {
public static void main(String[] args)throws Exception{
// 分区
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
DataStreamSource inputStream = env.readTextFile("E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt");
SingleOutputStreamOperator mapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
mapStream.print("map");
// shuffle 随机分配, 如果数据多的话, 随机分配不是很均匀
DataStream shuffleStream = mapStream.shuffle();
shuffleStream.print("shuffle");
env.execute("input");
}
}

















