添加pom.xml依赖
<!--lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-json</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
<!-- optional=true,依赖不会传递,该项目依赖devtools;之后依赖myboot项目的项目如果想要使用devtools,需要重新引入 -->
<scope>true</scope>
</dependency>
<!-- jackson-datatype-jsr310 -->
<dependency>
<groupId>com.fasterxml.jackson.datatype</groupId>
<artifactId>jackson-datatype-jsr310</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-data-redis -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-base -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-base</artifactId>
<version>1.14.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.ververica/flink-sql-connector-mysql-cdc -->
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-sql-connector-mysql-cdc</artifactId>
<version>2.3.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.14.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.14.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-runtime-web -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_2.12</artifactId>
<version>1.14.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-runtime -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-runtime_2.12</artifactId>
<version>1.14.6</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_2.12</artifactId>
<version>1.14.6</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>1.14.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.guava/guava -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>32.1.3-jre</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.83</version>
</dependency>
创建数据变更对象
package com.cqsym.nbigscreen.dese;
import lombok.Data;
@Data
public class DataChangeInfo {
/**
* 变更类型: 0 初始化 1新增 2修改 3删除 4导致源中的现有表被截断的操作
*/
private Integer operatorType;
/**
* 变更前数据
*/
private String beforeData;
/**
* 变更后数据
*/
private String afterData;
/**
* 操作的数据
*/
private String data;
/**
* binlog文件名
*/
private String fileName;
/**
* binlog当前读取点位
*/
private Integer filePos;
/**
* 数据库名
*/
private String database;
/**
* 表名
*/
private String tableName;
/**
* 变更时间
*/
private Long operatorTime;
}
实现MySQL消息读取自定义序列化
package com.cqsym.nbigscreen.dese;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.ImmutableMap;
import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Field;
import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Schema;
import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.data.Struct;
import com.ververica.cdc.connectors.shaded.org.apache.kafka.connect.source.SourceRecord;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import io.debezium.data.Envelope;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.util.Collector;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@Slf4j
public class MysqlDeserialization implements DebeziumDeserializationSchema<DataChangeInfo> {
public static final String TS_MS = "ts_ms";
public static final String BIN_FILE = "file";
public static final String POS = "pos";
public static final String BEFORE = "before";
public static final String AFTER = "after";
public static final String SOURCE = "source";
/**
* 获取操作类型 READ CREATE UPDATE DELETE TRUNCATE;
* 变更类型: 0 初始化 1新增 2修改 3删除 4导致源中的现有表被截断的操作
*/
private static final Map<String, Integer> OPERATION_MAP = ImmutableMap.of(
"READ", 0,
"CREATE", 1,
"UPDATE", 2,
"DELETE", 3,
"TRUNCATE", 4);
@Override
public void deserialize(SourceRecord sourceRecord, Collector<DataChangeInfo> collector) throws Exception {
String topic = sourceRecord.topic();
String[] fields = topic.split("\\.");
String database = fields[1];
String tableName = fields[2];
Struct struct = (Struct) sourceRecord.value();
final Struct source = struct.getStruct(SOURCE);
DataChangeInfo dataChangeInfo = new DataChangeInfo();
// 获取操作类型 READ CREATE UPDATE DELETE TRUNCATE;
Envelope.Operation operation = Envelope.operationFor(sourceRecord);
String type = operation.toString().toUpperCase();
int eventType = OPERATION_MAP.get(type);
// fixme 一般情况是无需关心其之前之后数据的,直接获取最新的数据即可,但这里为了演示,都进行输出
dataChangeInfo.setBeforeData(getJsonObject(struct, BEFORE).toJSONString());
dataChangeInfo.setAfterData(getJsonObject(struct, AFTER).toJSONString());
if (eventType == 3) {
dataChangeInfo.setData(getJsonObject(struct, BEFORE).toJSONString());
} else {
dataChangeInfo.setData(getJsonObject(struct, AFTER).toJSONString());
}
dataChangeInfo.setOperatorType(eventType);
dataChangeInfo.setFileName(Optional.ofNullable(source.get(BIN_FILE)).map(Object::toString).orElse(""));
dataChangeInfo.setFilePos(
Optional.ofNullable(source.get(POS))
.map(x -> Integer.parseInt(x.toString()))
.orElse(0)
);
dataChangeInfo.setDatabase(database);
dataChangeInfo.setTableName(tableName);
dataChangeInfo.setOperatorTime(Optional.ofNullable(struct.get(TS_MS))
.map(x -> Long.parseLong(x.toString())).orElseGet(System::currentTimeMillis));
// 输出数据
collector.collect(dataChangeInfo);
}
/**
* 从元素数据获取出变更之前或之后的数据
*
* @param value value
* @param fieldElement fieldElement
* @return JSONObject
*/
private JSONObject getJsonObject(Struct value, String fieldElement) {
Struct element = value.getStruct(fieldElement);
JSONObject jsonObject = new JSONObject();
if (element != null) {
Schema afterSchema = element.schema();
List<Field> fieldList = afterSchema.fields();
for (Field field : fieldList) {
Object afterValue = element.get(field);
jsonObject.put(field.name(), afterValue);
}
}
return jsonObject;
}
@Override
public TypeInformation<DataChangeInfo> getProducedType() {
return TypeInformation.of(DataChangeInfo.class);
}
}
自定义实现用户定义的接收器功能
package com.cqsym.nbigscreen.sink;
import com.cqsym.nbigscreen.dese.DataChangeInfo;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;
import org.springframework.stereotype.Component;
@Slf4j
@Component
public class DataChangeSink implements SinkFunction<DataChangeInfo> {
@Override
public void invoke(DataChangeInfo dataChangeInfo, Context context) {
// 变更类型: 0 初始化 1新增 2修改 3删除 4导致源中的现有表被截断的操作
Integer operatorType = dataChangeInfo.getOperatorType();
// TODO 数据处理,不能在方法外注入需要的bean,会报错必须实例化才可以(Caused by: org.apache.flink.api.common.InvalidProgramException: xxx... is not serializable. The object probably contains or references non serializable fields.),
// 所以使用SpringUtil 获取需要的 bean,比如获取 extends ElasticsearchRepository<T, ID>的接口如下所示,然后就可以使用封装的方法进行增删改操作了
// XXXXXSearchRepository repository = SpringUtil.getBean(XXXXXSearchRepository.class);
log.info("DataChangeSink invoke:{}", dataChangeInfo);
log.info("变更类型: 0 初始化 1新增 2修改 3删除 4===>operatorType:{}", operatorType);
}
}
实现MySQL变更监听
package com.cqsym.nbigscreen.listener;
import com.cqsym.nbigscreen.dese.DataChangeInfo;
import com.cqsym.nbigscreen.dese.MysqlDeserialization;
import com.cqsym.nbigscreen.sink.DataChangeSink;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Component;
import java.util.concurrent.CompletableFuture;
@Slf4j
@Component
@RequiredArgsConstructor
public class MysqlEventListener implements ApplicationRunner {
private final DataChangeSink dataChangeSink;
@Override
public void run(ApplicationArguments args) {
CompletableFuture.runAsync(() -> {
try {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 设置2个并行源任务
env.setParallelism(2);
MySqlSource<DataChangeInfo> mySqlSource = buildDataChangeSource();
DataStream<DataChangeInfo> streamSource = env
.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "mysql-source-es")
//对接收器使用并行1来保持消息的顺序
.setParallelism(1);
streamSource.addSink(dataChangeSink);
env.executeAsync("mysql-cdc-es");
} catch (Exception e) {
log.error("mysql --> es, Exception=", e);
}
}).exceptionally(ex -> {
ex.printStackTrace();
return null;
});
}
/**
* 构造变更数据源
*
* @return DebeziumSourceFunction<DataChangeInfo>
*/
private MySqlSource<DataChangeInfo> buildDataChangeSource() {
return MySqlSource.<DataChangeInfo>builder()
.hostname("192.168.203.150")
.port(3306)
.databaseList("twms")
// 支持正则匹配
.tableList("twms.auth_user")
.username("aliyun_root")
.password("root__")
// initial:初始化快照,即全量导入后增量导入(检测更新数据写入)
.startupOptions(StartupOptions.initial())
.deserializer(new MysqlDeserialization())
.serverTimeZone("GMT+8")
.build();
}
}
运行结果
https://blog.51cto.com/caidingnu/6100996