0 ClickHouse 语法优化规则
1 准备测试用表
// 解压到 clickhouse 数据路径 sudo tar -xvf hits_v1.tar -C /var/lib/clickhouse sudo tar -xvf visits_v1.tar -C /var/lib/clickhouse //修改所属用户 sudo chown -R clickhouse:clickhouse /var/lib/clickhouse/data/datasets sudo chown -R clickhouse:clickhouse /var/lib/clickhouse/metadata/datasets
2 COUNT 优化
EXPLAIN SELECT count()FROM datasets.hits_v1; Union Expression (Projection) Expression (Before ORDER BY and SELECT) MergingAggregated ReadNothing (Optimized trivial count)
EXPLAIN SELECT count(CounterID) FROM datasets.hits_v1; Union Expression (Projection) Expression (Before ORDER BY and SELECT) Aggregating Expression (Before GROUP BY) ReadFromStorage (Read from MergeTree)
3 消除子查询重复字段
EXPLAIN SYNTAX SELECT a.UserID, b.VisitID, a.URL, b.UserID FROM hits_v1 AS a LEFT JOIN ( SELECT UserID, UserID as HaHa, VisitID FROM visits_v1) AS b USING (UserID) limit 3; //返回优化语句: SELECT UserID, VisitID, URL, b.UserID FROM hits_v1 AS a ALL LEFT JOIN ( SELECT UserID, VisitID FROM visits_v1 ) AS b USING (UserID) LIMIT 3
4 谓词下推
EXPLAIN SYNTAX SELECT UserID FROM hits_v1 GROUP BY UserID HAVING UserID = '8585742290196126178'; //返回优化语句 SELECT UserID FROM hits_v1 WHERE UserID = \'8585742290196126178\' GROUP BY UserID
EXPLAIN SYNTAX SELECT * FROM ( SELECT UserID FROM visits_v1 ) WHERE UserID = '8585742290196126178' //返回优化后的语句 SELECT UserID FROM ( SELECT UserID FROM visits_v1 WHERE UserID = \'8585742290196126178\'
) WHERE UserID = \'8585742290196126178\'
EXPLAIN SYNTAX SELECT * FROM ( SELECT * FROM ( SELECT UserID FROM visits_v1) UNION ALL SELECT * FROM ( SELECT UserID FROM visits_v1) ) WHERE UserID = '8585742290196126178' //返回优化后的语句 SELECT UserID FROM ( SELECT UserID FROM ( SELECT UserID FROM visits_v1 WHERE UserID = \'8585742290196126178\' ) WHERE UserID = \'8585742290196126178\' UNION ALL SELECT UserID FROM ( SELECT UserID FROM visits_v1 WHERE UserID = \'8585742290196126178\' ) WHERE UserID = \'8585742290196126178\' ) WHERE UserID = \'8585742290196126178\'
5 聚合计算外推
EXPLAIN SYNTAX SELECT sum(UserID * 2) FROM visits_v1 //返回优化后的语句 SELECT sum(UserID) * 2 FROM visits_v1
6 聚合函数消除
EXPLAIN SYNTAX SELECT sum(UserID * 2), max(VisitID), max(UserID) FROM visits_v1 GROUP BY UserID //返回优化后的语句 SELECT sum(UserID) * 2, max(VisitID), UserID FROM visits_v1 GROUP BY UserID
7 删除重复的 order by key
EXPLAIN SYNTAX SELECT * FROM visits_v1 ORDER BY UserID ASC, UserID ASC, VisitID ASC, VisitID ASC //返回优化后的语句: select …… FROM visits_v1 ORDER BY UserID ASC, VisitID ASC
8 删除重复的 limit by key
EXPLAIN SYNTAX SELECT * FROM visits_v1 LIMIT 3 BY VisitID, VisitID LIMIT 10 //返回优化后的语句: select …… FROM visits_v1 LIMIT 3 BY VisitID LIMIT 10
9 删除重复的 USING Key
EXPLAIN SYNTAX SELECT a.UserID, a.UserID, b.VisitID, a.URL, b.UserID FROM hits_v1 AS a LEFT JOIN visits_v1 AS b USING (UserID, UserID) //返回优化后的语句: SELECT UserID, UserID, VisitID, URL, b.UserID FROM hits_v1 AS a ALL LEFT JOIN visits_v1 AS b USING (UserID)
10 标量替换
EXPLAIN SYNTAX WITH ( SELECT sum(bytes) FROM system.parts WHERE active ) AS total_disk_usage SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, table FROM system.parts GROUP BY table ORDER BY table_disk_usage DESC
LIMIT 10;
//返回优化后的语句: WITH CAST(0, \'UInt64\') AS total_disk_usage SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, table FROM system.parts GROUP BY table ORDER BY table_disk_usage DESC LIMIT 10
11 三元运算优化
EXPLAIN SYNTAX SELECT number = 1 ? 'hello' : (number = 2 ? 'world' : 'atguigu') FROM numbers(10) settings optimize_if_chain_to_multiif = 1; //返回优化后的语句: SELECT multiIf(number = 1, \'hello\', number = 2, \'world\', \'atguigu\') FROM numbers(10) SETTINGS optimize_if_chain_to_multiif = 1