FlinkSQL来构建实时数仓,其思路大概如下:Flink的Table API提供了对kafka/jdbc/hbase等实时开发涉及到的组件的支持,以kafka为例,将kafka topic抽象成Flink Table,如下:
FlinkSQL读数据建表语句
CREATE TABLE flink_rtdw.demo.kafka_source_table (
topic STRING,
bidWord STRING,
planID STRING,
eventTime INTEGER,
procTime AS PROCTIME(),
ets AS TO_TIMESTAMP(FROM_UNIXTIME(eventTime)),
WATERMARK FOR ets AS ets - INTERVAL '1' MINUTE
) WITH (
'connector' = 'kafka',
'topic' = 'ba.join.shbt2.search-ocpc-click',
'properties.bootstrap.servers' = ‘Kafka-broker',
'properties.group.id' = 'testGroup',
'scan.startup.mode' = 'latest-offset',
'format' = 'json'
);
FlinkSQL写数据建表语句
CREATE TABLE flink_rtdw.demo.kafka_sink_table (
window_time BIGINT,
topic STRING,
bid_word_count BIGINT
) WITH (
'connector' = 'kafka',
'topic' = 'ultron.demo.shbt2.into.shbt2.tumlewindow.dev',
'properties.bootstrap.servers' = ‘kafka-broker',
'format' = 'json'
);
读取kafka_source_table中数据根据指标统计写如kafka_source_table
统计一秒滚动窗口出现次数
INSERT INTO
flink_rtdw.demo.kafka_sink_table
SELECT
UNIX_TIMESTAMP(
DATE_FORMAT(
TUMBLE_START(procTime, INTERVAL '1' MINUTE),
'yyyy-MM-dd HH:mm:ss'
)
) * 1000 as window_time,
topic,
COUNT(bidWord)
FROM
flink_rtdw.demo.kafka_source_table
GROUP BY
TUMBLE(procTime, INTERVAL '1' MINUTE),
topic;