转换算子

1.txt

sensor_1,111,11.1
sensor_2,121,11.2
sensor_3,211,11.3
sensor_4,311,11.4

1.基本转换算子

map, flatMap, filter

public class TransformTest01_Base {
public static void main(String[] args)throws Exception{
// 基本转化算子
//map, flatMap, filter
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt ";
DataStream dataStream = env.readTextFile(path);
// map:把String转化为Integer输出,输出的为字符长度
DataStream mapStream = dataStream.map(new MapFunction() {
@Override
public Integer map(String value) throws Exception {
return value.length();
}
});
// flatMap:按照逗号切分
DataStream flatMapStream = dataStream.flatMap(new FlatMapFunction() {
@Override
public void flatMap(String value, Collector out) throws Exception {
String[] arr = value.split(",");
for (String s : arr) {
out.collect(s);
}
}
});
// filter
SingleOutputStreamOperator filterStream = dataStream.filter(new FilterFunction() {
@Override
public boolean filter(String value) throws Exception {

return value.startsWith("sensor_1");
}
});
filterStream.print("filterStream");

flatMapStream.print("flatMap");
mapStream.print();
env.execute();
}
}

flink教程3-Transfom_ide

1.2 聚合函数

keyBy, maxBy

public class TransformTest02_RollingAggregation {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);

String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
/*
DataStream dataStream = inputStream.map(new MapFunction() {
@Override
public SensorReading map(String value) throws Exception {
String[] arr = value.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
}
});
*/
// 2
DataStream dataStream = inputStream.map(line -> {
String[] arr = line.split(",");
return new SensorReading(arr[0], new Long(arr[1]), new Double(arr[2]));
});
// 分组
// Tuple:组合
KeyedStream keyedStream = dataStream.keyBy("id");
// 得到的为id
KeyedStream keyedStream1 = dataStream.keyBy(data -> data.getId());

// 滚动聚合
DataStream max = keyedStream.maxBy("temperature");
max.print();


env.execute();

}
}

flink教程3-Transfom_java_02

1.3 Reduce

public class TransformTest03_Reduce {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);

String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
DataStream MapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
// 分组
KeyedStream keyedStream = MapStream.keyBy("id");
// reduce
/*
keyedStream.reduce(new ReduceFunction() {
@Override
public SensorReading reduce(SensorReading value1, SensorReading value2) throws Exception {
return new SensorReading(value1.getId(), value2.getTimestamp(), Math.max(value1.getTemperature(), value2.getTemperature()));
}
});
*/
// 当前状态和当前最新数据
SingleOutputStreamOperator reduce = keyedStream.reduce((curState, newData) -> {
return new SensorReading(curState.getId(), newData.getTimestamp(), Math.max(curState.getTemperature(), newData.getTemperature()));
});
reduce.print();

env.execute();
}
}

flink教程3-Transfom_ide_03

1.5 多流函数

DataStream -> SplitStream split/select

合流:connect

public class TransformTest04_MultipleStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);

String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
DataStream MapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
// 分流:11.2 DataStream -> SplitStream split/select
SplitStream dataStream = MapStream.split(new OutputSelector() {
@Override
public Iterable select(SensorReading sensorReading) {
return sensorReading.getTemperature() > 11.2 ? Collections.singletonList("high") : Collections.singletonList("low");
}

});
DataStream higthStream = dataStream.select("high");
DataStream lowStream = dataStream.select("low");
higthStream.print("higth");
// 合流:connect
env.execute("stream");
}
}

flink教程3-Transfom_ide_04

1.6 富函数

Rich Function

public class TransformTest05_RichFunction {
public static void main(String[] args)throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);

String path = "E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt";
DataStreamSource inputStream = env.readTextFile(path);
DataStream MapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
DataStream> dataStream = MapStream.map(new MyMapper());

dataStream.print();

env.execute();
}
public static class MyMapper1 implements MapFunction>{

@Override
public Tuple2 map(SensorReading value) throws Exception {
return null;
}
}

public static class MyMapper extends RichMapFunction>{

@Override
public Tuple2 map(SensorReading value) throws Exception {
// 获取id 和当前子任务的序号
return new Tuple2<>(value.getId(), getRuntimeContext().getIndexOfThisSubtask());
}

@Override
public void open(Configuration parameters) throws Exception {
System.out.println("open");
}

@Override
public void close() throws Exception {
System.out.println("close");
}
}
}

flink教程3-Transfom_flink_05

1.7 分区函数

Partition

public class TransformTest06_Partition {
public static void main(String[] args)throws Exception{
// 分区
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);

DataStreamSource inputStream = env.readTextFile("E:\\atguiguDemo03\\leet-code\\flink04_java\\src\\main\\resources\\1.txt");
SingleOutputStreamOperator mapStream = inputStream.map(data -> {
String[] arr = data.split(",");
return new SensorReading(arr[0], Long.parseLong(arr[1]), Double.parseDouble(arr[2]));
});
mapStream.print("map");
// shuffle 随机分配, 如果数据多的话, 随机分配不是很均匀
DataStream shuffleStream = mapStream.shuffle();
shuffleStream.print("shuffle");

env.execute("input");


}
}

flink教程3-Transfom_大数据_06