南大通用GBase 8c行存储引擎下的语法概述

转载

南大通用GBase 2024-09-06 15:15:41

GBase 8c 是一款高性能、高可用、高安全的数据库管理系统，广泛应用于大数据处理和分析领域。它支持SQL标准，使得用户可以方便地进行数据操作和查询。GBase 8c支持行存、列存、内存等多种存储引擎，本文介绍行存OLTP典型使用场景下的SQL语法，帮助用户更好地理解和使用GBase 8c。

1：行存储引擎

GBase 8c行存支持astore（追加更新）和ustore（原位更新）。两者最大的区别在于ustore中新版本和历史版本的数据是分离存储的。

如不指定，默认使用astore，下文使用默认场景。

---astore
drop table if exists test_astore_row ;
drop table if exists test_astore_column ;
drop foreign table if exists test_astore_mot ;
--- 创建 astore 行存表
create table test_astore_row(col text)  with (orientation=row) ;
--- 创建 astore 列存表
create table test_astore_column(col text) with (orientation=column);
--- 创建 astore 内存表
create foreign table test_astore_mot(col int) server mot_server ;
---ustore
drop table if exists test_ustore ;
create table test_ustore(col text)with(storage_type=ustore);

2：分布表，复制表

GBase 8c的数据分片策略：在创建表时，通过关键字distribute/replication 来设置表分片策略。

--- 1：通过distribute by hash 创建 hash 分片表，将数据通过hash算法 均匀存储到每个数据节点。创建hash分片
drop table if exists t1_dis ;
CREATE TABLE t1_dis(c1 int, c2 int)  DISTRIBUTE BY hash(c1);
--- 2：通过distribute by replication 创建 复制表，每个数据节点都会存储一份数据。
drop table if exists t1_rep ;
CREATE TABLE t1_rep(c1 int, c2 int)  DISTRIBUTE BY replication;

3：多种索引

GBase 8c经常使用的索引有以下几种：

hash 索引：只能处理简单等值查询。通过语法：create index ... using hash(column) 指定使用hash索引。
B-tree 索引： btree索引常常用来进行例如大于、小于、等于这些操作。通过语法：create index ... using btree(column) 指定使用btree索引。
gist 索引：地理数据、图像：如果我们想要查询在某个地方是否存在某一点，即判断地理位置的"包含“。对于空间数据，GiST索引可以使用 R树，以支持相对位置运算符（位于左侧，右侧，包含等）。对于树形图，R树可以支持相交或包含运算符。通过语法：create index ... using gist(column) 指定使用gist索引。
gin 索引：当需要搜索多值类型内的VALUE时，适合多值类型，例如数组、全文检索、TOKEN。（根据不同的类型，支持相交、包含、大于、在左边、在右边等搜索）。通过语法：create index ... using gin(column) 指定使用 gin 索引。

---hash 索引
create table rw_split(col int,name text);
insert into rw_split select generate_series(1,50000), md5(random()::text)::text;
create index rw_split_col_hash on rw_split using hash(col);
explain select * from rw_split where col =2;
---btree 索引
drop index rw_split_col_hash;
create index rw_split_col_btree on rw_split using btree(col);
explain select * from rw_split where col =2;
---gist 索引
drop table if exists t_gist;
create table t_gist(id int,p point);
insert into t_gist select generate_series(1,10000),point(round((random()*1000)::numeric,2),round((random()*1000)::numeric,2));
select * from t_gist limit 2;
create index on t_gist using gist(p);
explain (analyze,buffers) select * from t_gist where circle '((100,100) 1)' @> p order by p <-> '(100,100)' limit 10; ---在 100，100 点，半径10 以内的点。

---gin 索引
drop table if exists t_gin;
create table t_gin(doc text, doc_tsv tsvector);
insert into t_gin(doc) values
 ('Can a sheet slitter slit sheets?'),
 ('How many sheets could a sheet slitter slit?'),
 ('I slit a sheet, a sheet I slit.'),
 ('Upon a slitted sheet I sit.'),
 ('Whoever slit the sheets is a good sheet slitter.'),
 ('I am a sheet slitter.'),
 ('I slit sheets.'),
 ('I am the sleekest sheet slitter that ever slit sheets.'),
 ('She slits the sheet she sits on.');
update t_gin set doc_tsv = to_tsvector(doc);
create index on t_gin using gin(doc_tsv);
--- 为了效果，手动关闭顺序扫描。
set enable_seqscan=off;
explain(costs off) select doc from t_gin where doc_tsv @@ to_tsquery('many & slitter');
----表达式索引
create table test_expression(col int, name varchar(64));
insert into test_expression select 1,'ASDD';
insert into test_expression select 2,'ASDD';
insert into test_expression select 3,'AS';
insert into test_expression select 4,'ASsda';
insert into test_expression select 5,'ASdssa';
insert into test_expression select 6,'Asds';
insert into test_expression select 7,'Assa';
insert into test_expression select 8,'as';

explain select * from test_expression where lower(name) ='as';
create index on test_expression (name);
create index test_expression_lower on test_expression (lower(name));
explain select * from test_expression where lower(name) ='as';