第一种方式,下标类型
代码实现:
package cn._51doit.flink.day02;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* key的用法
* 下标类型只适用于Tuple2【无界流数据】
* 实现功能:按本节点的nc -lk 8888命令下,对输入的单词按下标做统计
* 比如:输入重复单词按下标1开始记数,当输入不同单词就从1开始
*/
public class KeyedDemo01 {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//spark
DataStreamSource<String> words = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING,Types.INT));
//keyBy从0开始累加打印
KeyedStream<Tuple2<String, Integer>, Tuple> keyed = wordAndOne.keyBy(0,1);
keyed.sum(1).print();
env.execute();
}
}
打印输出
第二种方式,按条件类型(过时了)
代码:统计金额
package cn._51doit.flink.day02;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* key的用法【无限流数据】
*
* 多个条件进行keyby
*
* 实现功能:按本节点的nc -lk 8888命令下,对输入的单词按3个条件进行keyby【省份、城市、金额】
* 比如:上海市,浦东新区,1000、上海市,青浦区,500
*注意:如果切数据发现中文逗号,会出错
*
*/
public class KeyedDemo02 {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//省份、城市、金额
DataStreamSource<String> lines = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple3<String, String, Integer>> provinceCityAndMoney = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
@Override
public Tuple3<String, String, Integer> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple3.of(fields[0], fields[1], Integer.parseInt(fields[2]));
}
});
KeyedStream<Tuple3<String, String, Integer>, Tuple> keyed = provinceCityAndMoney.keyBy(0, 1);
keyed.sum(2).print();
env.execute();
}
}
控制台打印输出
改造之后方式1:单个Tuple3去创建
package cn._51doit.flink.day03;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class KeyedDemo02 {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//省份、城市、金额
DataStreamSource<String> lines = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple3<String, String, Integer>> provinceCityAndMoney = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
@Override
public Tuple3<String, String, Integer> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple3.of(fields[0], fields[1], Integer.parseInt(fields[2]));
}
});
KeyedStream<Tuple3<String, String, Integer>, String> keyed = provinceCityAndMoney.keyBy(new KeySelector<Tuple3<String, String, Integer>, String>() {
@Override
public String getKey(Tuple3<String, String, Integer> value) throws Exception {
return value.f0 + value.f1;
}
});
keyed.sum(2).print();
env.execute();
}
}
改造之后的方式2:Tuple3Tuple2去创建
package cn._51doit.flink.day03;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class KeyedDemo03 {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//省份、城市、金额
DataStreamSource<String> lines = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple3<String, String, Integer>> provinceCityAndMoney = lines.map(new MapFunction<String, Tuple3<String, String, Integer>>() {
@Override
public Tuple3<String, String, Integer> map(String value) throws Exception {
String[] fields = value.split(",");
return Tuple3.of(fields[0], fields[1], Integer.parseInt(fields[2]));
}
});
KeyedStream<Tuple3<String, String, Integer>, Tuple2<String, String>> keyed = provinceCityAndMoney.keyBy(new KeySelector<Tuple3<String, String, Integer>, Tuple2<String, String>>() {
@Override
public Tuple2<String, String> getKey
(Tuple3<String, String, Integer> value) throws Exception {
return Tuple2.of(value.f0,value.f1);
}
});
keyed.sum(2).print();
env.execute();
}
}
第三种方式,采用f0字段
为什么采用f0可以?
因为SingleOutputStreamOperator传了一个Tuple2,而Tuple2点击进去查看发现它是一个类,定义了公共的f0的字段名字
/** Field 0 of the tuple. */
public T0 f0;
/** Field 1 of the tuple. */
public T1 f1;
代码实现:
package cn._51doit.flink.day03;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* key的用法
* 采用f0作为字段统计【无界流数据】
*
* 实现功能:按本节点的nc -lk 8888命令下,对输入的单词按字段做统计
* 比如:第一行输入hadoop spark flink,这样输入它会作为整体,当作一个字段去统计采用f0的使用原则只能独占一行去输入单词去统计
*/
public class KeyedDemo04 {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//spark
DataStreamSource<String> words = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING,Types.INT));
//keyBy从0开始累加打印
KeyedStream<Tuple2<String, Integer>, Tuple> keyed = wordAndOne.keyBy("f0");
keyed.sum(1).print();
env.execute();
}
}
控制台打印输出:
第四种方式,f0、f1字段一起使用
代码实现:
package cn._51doit.flink.day03;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**统
* * key的用法
* * 采用f0、f1作为字段统计【无界流数据】
* * 实现功能:按本节点的nc -lk 8888命令下,对输入的单词按条件做统计【按省份、城市、金额】
* * f0是对条件相同单词合并并对该金额累加进行统计,f1去找到相同的组并对该组的单词的数字进行计
*/
public class KeyedDemo05 {
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//spark
DataStreamSource<String> words = env.socketTextStream("Master", 8888);
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING,Types.INT));
//keyBy从0开始累加打印
KeyedStream<Tuple2<String, Integer>, Tuple> keyed = wordAndOne.keyBy("f0","f1");
keyed.sum(1).print();
env.execute();
}
}
控制台打印输出: