文章目录
- CDC概述
- Flink-CDC
- Flink-CDC代码测试
- 1、开启MySQL8 Binlog
- 2、MySQL数据准备
- 3、准备开发环境
- 4、Java代码
- 4.1、DataStream式的示例
- 4.2、FlinkSQL示例
- 测试结果JSON格式一览(StartupOptions.initial)
- 1、对监视的数据库表执行初始快照
- 2、插入数据
- 3、更新数据
- 4、删除数据
CDC概述
- Change Data Capture【捕捉变更的数据】
监测并捕获数据库的变动(数据或表的增删改…),按发生的顺序 完整地写到消息中间件
CDC的种类 | 基于查询 | 基于 Binlog |
工具 | Sqoop | Canal、Maxwell、Debezium |
处理模式 | 批 | 流 |
能否捕获所有变化 | 否 | 能 |
延迟 | 高 | 低 |
是否增加数据库压力 | 是 | 否 |
Flink-CDC
- Flink社区开发并开源了
flink-cdc-connectors
组件
可直接从MySQL、PostgreSQL等数据库 读取全量和增量变化数据
Maxwell
Flink-CDC
比较Maxwell和Flink-CDC方案
MySQL之Binlog
Kafka
Flink
MySQL之Binlog
Flink
Flink-CDC代码测试
1、开启MySQL8 Binlog
1、编辑MySQL配置
vim /etc/my.cnf
2、添加如下内容
server-id=1
log-bin=mysql-bin
binlog_format=row
binlog-do-db=db1
参数 | 说明 |
| MySQL主从复制时,主从之间每个实例都有独一无二的ID |
| 生成的日志文件的前缀 |
| binlog格式 |
| 指定哪些库需要写到binlog;如果不配置,就会是所有库 |
| 参数值说明 | 空间占用 | 数据一致性 |
| 语句级:记录每次一执行写操作的语句 | 小 | 如果用binlog进行数据恢复,执行时间不同可能会导致数据不一致 |
| 行级:记录每次操作后每行记录的变化 | 大 | 绝对支持 |
|
| 小 | 极端情况下仍会造成数据不一致 |
3、重启MySQL
sudo systemctl restart mysqld
4、检测配置是否成功
mysql -uroot -e'SHOW variables LIKE "%log_bin%"' -p
2、MySQL数据准备
1、被监控的MySQL数据
DROP DATABASE IF EXISTS db1;
CREATE DATABASE db1;
CREATE TABLE db1.t
(a INT PRIMARY KEY,b VARCHAR(255),c TIMESTAMP DEFAULT CURRENT_TIMESTAMP);
INSERT db1.t(a,b,c) VALUES (2,'ab','2022-10-24 00:00:00');
2、插入数据(测试时才执行)
INSERT db1.t(a,b) VALUES (3,'bc');
UPDATE db1.t SET a=2,b='cd' WHERE a=2;
SELECT * FROM db1.t;
3、准备开发环境
WIN10+JDK1.8+IDEA2021+创建Maven项目;pom.xml
添加依赖
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>1.13.6</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>1.13.6</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>1.13.6</version>
</dependency>
<!-- FlinkCDC -->
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>2.1.0</version>
</dependency>
<!-- FlinkSQL -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>1.13.6</version>
</dependency>
<!-- flink-table相关依赖,可解决下面报错:
Caused by: java.lang.ClassNotFoundException:
org.apache.flink.connector.base.source.reader.RecordEmitter -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.12</artifactId>
<version>1.13.6</version>
</dependency>
<!-- JSON处理 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.19</version>
</dependency>
</dependencies>
4、Java代码
4.1、DataStream式的示例
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class FlinkStreamCDC {
public static void main(String[] args) throws Exception {
//TODO 1 创建流处理环境,设置并行度
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setParallelism(1);
//TODO 2 创建Flink-MySQL-CDC数据源
MySqlSource<String> mySqlSource = MySqlSource.<String>builder()
.hostname("主机地址")
.port(3306)
.username("root")
.password("密码")
//设置要捕获的库
.databaseList("db1")
//设置要捕获的表(库不能省略)
.tableList("db1.t")
//将接收到的SourceRecord反序列化为JSON字符串
.deserializer(new JsonDebeziumDeserializationSchema())
//启动策略
// initial:对监视的数据库表执行初始快照,并继续读取最新的binlog
// earliest:从binlog的开头读取数据
// latest:从binlog的末尾读取数据
// 还有specificOffset和timestamp,具体看源码即可……
.startupOptions(StartupOptions.initial())
.build();
//TODO 3 读取数据并打印
env.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "sourceName").print();
//TODO 4 执行
env.execute();
}
}
测试结果打印
{"before":null,"after":{"a":2,"b":"ab","c":"2022-10-24T00:00:00Z"},"source":{"version":"1.5.4.Final","connector":"mysql","name":"mysql_binlog_source","ts_ms":1669993379274,"snapshot":"false","db":"db1","sequence":null,"table":"t","server_id":0,"gtid":null,"file":"","pos":0,"row":0,"thread":null,"query":null},"op":"r","ts_ms":1669993379280,"transaction":null}
//执行DML后
{"before":null,"after":{"a":3,"b":"bc","c":"2022-12-02T15:09:55Z"},"source":{"version":"1.5.4.Final","connector":"mysql","name":"mysql_binlog_source","ts_ms":1669993795000,"snapshot":"false","db":"db1","sequence":null,"table":"t","server_id":1,"gtid":null,"file":"mysql-bin.000001","pos":1851,"row":0,"thread":null,"query":null},"op":"c","ts_ms":1669993794980,"transaction":null}
{"before":{"a":2,"b":"ab","c":"2022-10-23T16:00:00Z"},"after":{"a":2,"b":"cd","c":"2022-10-23T16:00:00Z"},"source":{"version":"1.5.4.Final","connector":"mysql","name":"mysql_binlog_source","ts_ms":1669993795000,"snapshot":"false","db":"db1","sequence":null,"table":"t","server_id":1,"gtid":null,"file":"mysql-bin.000001","pos":2145,"row":0,"thread":null,"query":null},"op":"u","ts_ms":1669993794987,"transaction":null}
4.2、FlinkSQL示例
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
public class FlinkSqlCdc {
public static void main(String[] args) throws Exception {
// TODO 1. 准备环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().setParallelism(1);
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// TODO 2. 创建动态表
tableEnv.executeSql("CREATE TABLE t (" +
" a INT," +
" b STRING," +
" c TIMESTAMP," +
" PRIMARY KEY(a) NOT ENFORCED" +
") WITH (" +
" 'connector' = 'mysql-cdc'," +
" 'hostname' = '主机地址'," +
" 'port' = '3306'," +
" 'username' = 'root'," +
" 'password' = '密码'," +
" 'database-name' = 'db1'," +
" 'table-name' = 't'" +
")");
tableEnv.executeSql("SELECT * FROM t").print();
// TODO 3. 执行
env.execute();
}
}
测试结果打印
+----+----+-----+----------------------------+
| op | a | b | c |
+----+----+-----+----------------------------+
| +I | 2 | ab | 2022-10-24 00:00:00.000000 |
//执行DML后
| +I | 3 | bc | 2022-12-02 15:15:14.000000 |
| -U | 2 | ab | 2022-10-23 16:00:00.000000 |
| +U | 2 | cd | 2022-10-23 16:00:00.000000 |
测试结果JSON格式一览(StartupOptions.initial)
数据库的操作 | op | before | after |
insert | c | null | 行数据 |
update | 先d后c | 行数据 | 行数据 |
delete | d | 行数据 | null |
1、对监视的数据库表执行初始快照
-- 建库
DROP DATABASE IF EXISTS db1;CREATE DATABASE db1;
-- 建表
CREATE TABLE db1.t(a INT PRIMARY KEY,b TIMESTAMP DEFAULT CURRENT_TIMESTAMP);
-- 插入
INSERT db1.t(a,b) VALUES (1,'2022-10-24 00:00:00');
JSON
{
"before": null,
"after": {
"a": 1,
"b": "2022-10-24T00:00:00Z"
},
"source": {
"version": "1.5.4.Final",
"connector": "mysql",
"name": "mysql_binlog_source",
"ts_ms": 1670656489808,
"snapshot": "false",
"db": "db1",
"sequence": null,
"table": "t",
"server_id": 0,
"gtid": null,
"file": "",
"pos": 0,
"row": 0,
"thread": null,
"query": null
},
"op": "r",
"ts_ms": 1670656489815,
"transaction": null
}
2、插入数据
INSERT db1.t(a) VALUES (2);
JSON
{
"before": null,
"after": {
"a": 2,
"b": "2022-12-10T07:15:52Z"
},
"source": {
"version": "1.5.4.Final",
"connector": "mysql",
"name": "mysql_binlog_source",
"ts_ms": 1670656552000,
"snapshot": "false",
"db": "db1",
"sequence": null,
"table": "t",
"server_id": 1,
"gtid": null,
"file": "mysql-bin.000001",
"pos": 5152,
"row": 0,
"thread": null,
"query": null
},
"op": "c",
"ts_ms": 1670656552743,
"transaction": null
}
3、更新数据
UPDATE db1.t SET a=3 WHERE a=1;
SELECT * FROM db1.t;
JSON
{
"before": {
"a": 1,
"b": "2022-10-23T16:00:00Z"
},
"after": null,
"source": {
"version": "1.5.4.Final",
"connector": "mysql",
"name": "mysql_binlog_source",
"ts_ms": 1670656602000,
"snapshot": "false",
"db": "db1",
"sequence": null,
"table": "t",
"server_id": 1,
"gtid": null,
"file": "mysql-bin.000001",
"pos": 5434,
"row": 0,
"thread": null,
"query": null
},
"op": "d",
"ts_ms": 1670656602253,
"transaction": null
}
{
"before": null,
"after": {
"a": 3,
"b": "2022-10-23T16:00:00Z"
},
"source": {
"version": "1.5.4.Final",
"connector": "mysql",
"name": "mysql_binlog_source",
"ts_ms": 1670656602000,
"snapshot": "false",
"db": "db1",
"sequence": null,
"table": "t",
"server_id": 1,
"gtid": null,
"file": "mysql-bin.000001",
"pos": 5434,
"row": 0,
"thread": null,
"query": null
},
"op": "c",
"ts_ms": 1670656602253,
"transaction": null
}
4、删除数据
DELETE FROM db1.t WHERE a=3;
JSON
{
"before": {
"a": 3,
"b": "2022-10-23T16:00:00Z"
},
"after": null,
"source": {
"version": "1.5.4.Final",
"connector": "mysql",
"name": "mysql_binlog_source",
"ts_ms": 1670656744000,
"snapshot": "false",
"db": "db1",
"sequence": null,
"table": "t",
"server_id": 1,
"gtid": null,
"file": "mysql-bin.000001",
"pos": 5717,
"row": 0,
"thread": null,
"query": null
},
"op": "d",
"ts_ms": 1670656744059,
"transaction": null
}