一、建分区表
CREATE TABLE t4 (
f1 VARCHAR(30),
f2 VARCHAR(30),
...
f12 VARCHAR(30)
) partition by hash (f1);
create table t4_1 partition of t4 for values with (modulus 4, remainder 0);
create table t4_2 partition of t4 for values with (modulus 4, remainder 1);
create table t4_3 partition of t4 for values with (modulus 4, remainder 2);
create table t4_4 partition of t4 for values with (modulus 4, remainder 3);
哈希分区,共4个分区表
二、查询性能
分区表查询 | 行存表耗时 | 列存表查询 | 列存表耗时 |
select count(*) from t4 where f2 like 'FG%' limit 100; | 13.642s | select count(*) from t3 where f2 like '3XJD%' limit 100 | 1.909s |
select substr(f1, 1, 3), count(*) from t4 group by substr(f1, 1, 3) | 19.69s | select substr(f1, 1, 3), count(*) from t3 group by substr(f1, 1, 3) | 16.768s |
select f1, count(*) from t4 group by f1 | 8.531s | select f1, count(*) from t3 group by f1 | 5.834s |
select count(*) from t4; | 5.689s | select count(*) from t3 | 0.409s |
select concat_ws(',', f1, f2, f3, f4, f5) from t4 where f1 like 'W4SK%' limit 100; | 1.371s | select concat_ws(',', f1, f2, f3, f4, f5) from t3 where f1 like 'W4SK%' limit 100 | 0.371s |
select concat_ws(',', f1, f2, f3, f4, f5) from t4 where f1 like 'W4SK%' limit 10000; | 6.938s | select concat_ws(',', f1, f2, f3, f4, f5) from t3 where f1 like 'W4SK%' limit 10000 | 11.502s |
select sum(length(v1)) from ( select concat_ws(',', f1, f2, f3, f4, f5) as v1 from t4 where f1 like 'W4SK%') a; | 6.277s | select sum(length(v1)) from ( select concat_ws(',', f1, f2, f3, f4, f5) as v1 from t3 where f1 like 'W4SK%') a | 6.424s |
通过测试,在上述场景中,列存的查询性能占优
下面将4个分区表,改为9个分区表,我们来看一下性能如何?
4分区表查询 | 行存4分区表耗时 | 9分区表查询 | 行存9分区表耗时 |
select count(*) from t4 where f2 like 'FG%' limit 100; | 13.642s | select count(*) from t4 where f2 like 'FG%' limit 100; | 2.53s |
select substr(f1, 1, 3), count(*) from t4 group by substr(f1, 1, 3) | 19.69s | select substr(f1, 1, 3), count(*) from t4 group by substr(f1, 1, 3); | 9.402s |
select f1, count(*) from t4 group by f1 | 8.531s | select f1, count(*) from t4 group by f1; | 8.16s |
select count(*) from t4; | 5.689s | select count(*) from t4; | 5.603s |
select concat_ws(',', f1, f2, f3, f4, f5) from t4 where f1 like 'W4SK%' limit 100; | 1.371s | select concat_ws(',', f1, f2, f3, f4, f5) from t4 where f1 like 'W4SK%' limit 100; | 0.009s |
select concat_ws(',', f1, f2, f3, f4, f5) from t4 where f1 like 'W4SK%' limit 10000; | 6.938s | select concat_ws(',', f1, f2, f3, f4, f5) from t4 where f1 like 'W4SK%' limit 10000; | 2.54s |
select sum(length(v1)) from ( select concat_ws(',', f1, f2, f3, f4, f5) as v1 from t4 where f1 like 'W4SK%') a; | 6.277s | select sum(length(v1)) from ( select concat_ws(',', f1, f2, f3, f4, f5) as v1 from t4 where f1 like 'W4SK%') a; | 2.636s |
通过上面的测试,可以看出9个分区,比4个分区,查询性能占优。
下面,我们将表格重新整理一下
行存4分区表耗时 | 行存9分区表耗时 | 列存表耗时 |
13.642s | 2.53s | 1.909s |
19.69s | 9.402s | 16.768s |
8.531s | 8.16s | 5.834s |
5.689s | 5.603s | 0.409s |
1.371s | 0.009s | 0.371s |
6.938s | 2.54s | 11.502s |
6.277s | 2.636s | 6.424s |
加持9个分区及列存,性能如下:
行存4分区表耗时 | 行存9分区表耗时 | 列存表无分区耗时 | 列存9分区表耗时 |
13.642s | 2.53s | 1.909s | 1.648s |
19.69s | 9.402s | 16.768s | 9.896s |
8.531s | 8.16s | 5.834s | 6.915s |
5.689s | 5.603s | 0.409s | 4.379s |
1.371s | 0.009s | 0.371s | 0.01s |
6.938s | 2.54s | 11.502s | 2.87s |
6.277s | 2.636s | 6.424s | 2.882s |
三、总结
综合来看,列存加分区还是比较强大的,数合建模,会根据客户的需求,匹配响应的版本来满足客户需求。