目录

1、单机安装2、clickhouse修改数据目录
3、clickhouse性能测试
4、查看数据库和表的容量大小

 

1、单机安装

官网安装教程:https://clickhouse.com/docs/zh/getting-started/install

这里使用官方预编译的tgz软件包进行安装。在 https://packages.clickhouse.com/tgz/stable/ 下载最新的安装包。

targetDataSources 接入clickhouse clickhouse tsdb_sed

然后按照 common-static, common-static-dbg, server, client的顺序解压,执行对应解压的doinst.sh即可。

tar -xzvf clickhouse-common-static-21.1.9.41.tgz
clickhouse-common-static-21.1.9.41/install/doinst.sh

tar -xzvf clickhouse-common-static-dbg-21.1.9.41.tgz
clickhouse-common-static-dbg-21.1.9.41/install/doinst.sh

tar -xzvf clickhouse-server-21.1.9.41.tgz
clickhouse-server-21.1.9.41/install/doinst.sh
// 提示输入defalut用户的密码,如果选择不需密码直接回车即可

tar -xzvf clickhouse-client-21.1.9.41.tgz
clickhouse-client-21.1.9.41/install/doinst.sh

使用 clickhouse --help 查看指令。clickhouse start/stop/restart 启动或停止服务,clickhouse-client 使用客户端连接。

 

默认,配置文件位于 /etc/clickhouse-server、/etc/clickhouse-client, 表数据位于 /var/lib/clickhouse/data、/var/lib/clickhouse/store、/var/lib/clickhouse/metadata,运行日志位于 /var/log/clickhouse-server

开启允许远程连接:修改/etc/clickhouse-server/config.xml,打开 <listen_host>::</listen_host> 的注释。然后关闭防火墙。开启远程连接后,可以通过 http://$clickhouse_server_ip:8123 进行HTTP访问。

本地打开cmd,通过mysql 客户端连接

mysql -udefault -P9004 -h192.168.xxx.xxx -p // 回车,输入密码。没有密码,直接回车

 

2、clickhouse修改数据目录

  clickhouse 默认数据目录在 /var/lib/clickhouse,一般分区空间有限,需要修改。只要停止数据库之后移走该目录再软连接回原地址,即可不修改 config.xml 实现对数据目录的修改。

#先停库
sudo clickhouse stop
sudo mkdir -p /sdb1/clickhousedata01
#权限一定要修改否则没权限就启动不了
sudo chown -R clickhouse:clickhouse /sdb1/clickhousedata01
sudo mv /var/lib/clickhouse /sdb1/clickhousedata01
#建立软连接
sudo ln -s /sdb1/clickhousedata01 /var/lib/clickhouse
sudo chown -R clickhouse:clickhouse /var/lib/clickhouse
sudo ls -l /var/lib/clickhouse
#启动
sudo clickhouse start

 

3、clickhouse性能测试

  使用 ssb 数据集进行测试:https://clickhouse.com/docs/zh/getting-started/example-datasets/star-schema

  ClickHouse 性能测试:https://developer.aliyun.com/article/940057

  导入数据时,官网上默认是导入到 default 库,如果是导入自己创建的数据库,比如 db1

clickhouse-client --query "INSERT INTO db1.customer FORMAT CSV" < customer.tbl

   ClickHouse多表查询ssb测试数据集补充

  官方ssb测试数据集只提供了4张表的创建,以及单表查询的测试。需要跟starrocks做对比,因此自己参考starrocks的多表查询测试sql(https://www.starrocks.com/zh-CN/blog/1.8),自己写了一下dates表的建表语句、以及多表查询的sql。

CREATE TABLE dates
(
    D_DATEKEY           UInt32,
    D_DATE              String,
    D_DAYOFWEEK         LowCardinality(String),
    D_MONTH             LowCardinality(String),
    D_YEAR              UInt32,
    D_YEARMONTHNUM      UInt32,
    D_YEARMONTH         String,
    D_DAYNUMINWEEK      UInt8,
    D_DAYNUMINMONTH     UInt8,
    D_DAYNUMINYEAR      UInt8,
    D_MONTHNUMINYEAR    UInt8,
    D_WEEKNUMINYEAR     UInt8,
    D_SELLINGSEASON     LowCardinality(String),
    D_LASTDAYINWEEKFL   LowCardinality(String),
    D_LASTDAYINMONTHFL  LowCardinality(String),
    D_HOLIDAYFL         LowCardinality(String),
    D_WEEKDAYFL         LowCardinality(String)
)
ENGINE = MergeTree ORDER BY D_DATEKEY;

  多表查询sql

targetDataSources 接入clickhouse clickhouse tsdb_sed_02

targetDataSources 接入clickhouse clickhouse tsdb_数据库_03

--Q1.1
select sum(LO_REVENUE) as revenue
from lineorder join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
where D_YEAR = 1993 and LO_DISCOUNT between 1 and 3 and LO_QUANTITY < 25;

-- Q1.2
select sum(LO_REVENUE) as revenue
from lineorder
    join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
where D_YEARMONTHNUM = 199401
  and LO_DISCOUNT between 4 and 6
  and LO_QUANTITY between 26 and 35;

-- Q1.3
select sum(LO_REVENUE) as revenue
from lineorder
    join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
where D_WEEKNUMINYEAR = 6 and D_YEAR = 1994
  and LO_DISCOUNT between 5 and 7
  and LO_QUANTITY between 26 and 35;

--Q2.1
select sum(LO_REVENUE) as lo_revenue, D_YEAR, P_BRAND
from lineorder
     inner join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join part on LO_PARTKEY = P_PARTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
where P_CATEGORY = 'MFGR#12' and S_REGION = 'AMERICA'
group by D_YEAR, P_BRAND
order by D_YEAR, P_BRAND;

--Q2.2
select sum(LO_REVENUE) as lo_revenue, D_YEAR, P_BRAND
from lineorder
     join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join part on LO_PARTKEY = P_PARTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
where P_BRAND between 'MFGR#2221' and 'MFGR#2228' and S_REGION = 'ASIA'
group by D_YEAR, P_BRAND
order by D_YEAR, P_BRAND;

--Q2.3
select sum(LO_REVENUE) as lo_revenue, D_YEAR, P_BRAND
from lineorder
     join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join part on LO_PARTKEY = P_PARTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
where P_BRAND = 'MFGR#2239' and S_REGION = 'EUROPE'
group by D_YEAR, P_BRAND
order by D_YEAR, P_BRAND;

--Q3.1
select C_NATION, S_NATION, D_YEAR, sum(LO_REVENUE) as lo_revenue
from lineorder
     join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join customer on LO_CUSTKEY = C_CUSTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
where C_REGION = 'ASIA' and S_REGION = 'ASIA'and D_YEAR >= 1992 and D_YEAR <= 1997
group by C_NATION, S_NATION, D_YEAR
order by D_YEAR asc, lo_revenue desc;

--Q3.2
select C_CITY, S_CITY, D_YEAR, sum(LO_REVENUE) as lo_revenue
from lineorder
     join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join customer on LO_CUSTKEY = C_CUSTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
where C_NATION = 'UNITED STATES' and S_NATION = 'UNITED STATES'
  and D_YEAR >= 1992 and D_YEAR <= 1997
group by C_CITY, S_CITY, D_YEAR
order by D_YEAR asc, lo_revenue desc;

--Q3.3
select C_CITY, S_CITY, D_YEAR, sum(LO_REVENUE) as lo_revenue
from lineorder
         join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
         join customer on LO_CUSTKEY = C_CUSTKEY
         join supplier on LO_SUPPKEY = S_SUPPKEY
where (C_CITY='UNITED KI1' or C_CITY='UNITED KI5')
  and (S_CITY='UNITED KI1' or S_CITY='UNITED KI5')
  and D_YEAR >= 1992 and D_YEAR <= 1997
group by C_CITY, S_CITY, D_YEAR
order by D_YEAR asc, lo_revenue desc;

--Q3.4
select C_CITY, S_CITY, D_YEAR, sum(LO_REVENUE) as lo_revenue
from lineorder
         join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
         join customer on LO_CUSTKEY = C_CUSTKEY
         join supplier on LO_SUPPKEY = S_SUPPKEY
where (C_CITY='UNITED KI1' or C_CITY='UNITED KI5')
  and (S_CITY='UNITED KI1' or S_CITY='UNITED KI5')
  and D_YEARMONTH = 'Dec1997'
group by C_CITY, S_CITY, D_YEAR
order by D_YEAR asc, lo_revenue desc;

--Q4.1
select D_YEAR, C_NATION, sum(LO_REVENUE) - sum(LO_SUPPLYCOST) as profit
from lineorder
     join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join customer on LO_CUSTKEY = C_CUSTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
     join part on LO_PARTKEY = P_PARTKEY
where C_REGION = 'AMERICA' and S_REGION = 'AMERICA' and (P_MFGR = 'MFGR#1' or P_MFGR = 'MFGR#2')
group by D_YEAR, C_NATION
order by D_YEAR, C_NATION;

--Q4.2
select D_YEAR, S_NATION, P_CATEGORY, sum(LO_REVENUE) - sum(LO_SUPPLYCOST) as profit
from lineorder
     join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join customer on LO_CUSTKEY = C_CUSTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
     join part on LO_PARTKEY = P_PARTKEY
where C_REGION = 'AMERICA' and S_REGION = 'AMERICA'
  and (D_YEAR = 1997 or D_YEAR = 1998)
  and (P_MFGR = 'MFGR#1' or P_MFGR = 'MFGR#2')
group by D_YEAR, S_NATION, P_CATEGORY
order by D_YEAR, S_NATION, P_CATEGORY;

--Q4.3
select D_YEAR, S_CITY, P_BRAND, sum(LO_REVENUE) - sum(LO_SUPPLYCOST) as profit
from lineorder
     join dates on toYYYYMMDD(LO_ORDERDATE) = D_DATEKEY
     join customer on LO_CUSTKEY = C_CUSTKEY
     join supplier on LO_SUPPKEY = S_SUPPKEY
     join part on LO_PARTKEY = P_PARTKEY
where C_REGION = 'AMERICA'and S_NATION = 'UNITED STATES'
  and (D_YEAR = 1997 or D_YEAR = 1998)
  and P_CATEGORY = 'MFGR#14'
group by D_YEAR, S_CITY, P_BRAND
order by D_YEAR, S_CITY, P_BRAND;

View Code

 

4、查看数据库和表的容量大小

  clickhouse 四(查看数据库和表的容量大小)

  查看数据库容量、行数、压缩率

SELECT 
    sum(rows) AS `总行数`,
    formatReadableSize(sum(data_uncompressed_bytes)) AS `原始大小`,
    formatReadableSize(sum(data_compressed_bytes)) AS `压缩大小`,
    round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS `压缩率`
FROM system.parts

┌────总行数─┬─原始大小──┬─压缩大小─┬─压缩率─┐
│ 326819026 │ 77.15 GiB │ 5.75 GiB │      7 │
└───────────┴───────────┴──────────┴────────┘

1 rows in set. Elapsed: 0.047 sec. Processed 1.04 thousand rows, 520.93 KB (21.95 thousand rows/s., 
11.02 MB/s.)

  查看数据表容量、行数、压缩率,如果查询的table在多个库存在,where 条件加上 【AND database = '指定database'】

--在此查询一张临时表的信息
SELECT 
    table AS `表名`,
    sum(rows) AS `总行数`,
    formatReadableSize(sum(data_uncompressed_bytes)) AS `原始大小`,
    formatReadableSize(sum(data_compressed_bytes)) AS `压缩大小`,
    round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS `压缩率`
FROM system.parts
WHERE table IN ('temp_1') 【AND database = '指定database'】
GROUP BY table

┌─表名───┬──总行数─┬─原始大小───┬─压缩大小──┬─压缩率─┐
│ temp_1 │ 3127523 │ 838.21 MiB │ 60.04 MiB │      7 │
└────────┴─────────┴────────────┴───────────┴────────┘

1 rows in set. Elapsed: 0.008 sec.

  查看数据表分区信息

--查看测试表在19年12月的分区信息
SELECT 
    partition AS `分区`,
    sum(rows) AS `总行数`,
    formatReadableSize(sum(data_uncompressed_bytes)) AS `原始大小`,
    formatReadableSize(sum(data_compressed_bytes)) AS `压缩大小`,
    round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS `压缩率`
FROM system.parts
WHERE (database IN ('default')) AND (table IN ('temp_1')) AND (partition LIKE '2019-12-%')
GROUP BY partition
ORDER BY partition ASC

┌─分区───────┬─总行数─┬─原始大小──┬─压缩大小───┬─压缩率─┐
│ 2019-12-01 │     24 │ 6.17 KiB  │ 2.51 KiB   │     41 │
│ 2019-12-02 │   9215 │ 2.45 MiB  │ 209.74 KiB │      8 │
│ 2019-12-03 │  17265 │ 4.46 MiB  │ 453.78 KiB │     10 │
│ 2019-12-04 │  27741 │ 7.34 MiB  │ 677.25 KiB │      9 │
│ 2019-12-05 │  31500 │ 8.98 MiB  │ 469.30 KiB │      5 │
│ 2019-12-06 │    157 │ 37.50 KiB │ 4.95 KiB   │     13 │
│ 2019-12-07 │    110 │ 32.75 KiB │ 3.86 KiB   │     12 │
└────────────┴────────┴───────────┴────────────┴────────┘

7 rows in set. Elapsed: 0.005 sec.

  查看数据表字段的信息

SELECT 
    column AS `字段名`,
    any(type) AS `类型`,
    formatReadableSize(sum(column_data_uncompressed_bytes)) AS `原始大小`,
    formatReadableSize(sum(column_data_compressed_bytes)) AS `压缩大小`,
    sum(rows) AS `行数`
FROM system.parts_columns
WHERE (database = 'default') AND (table = 'temp_1')
GROUP BY column
ORDER BY column ASC

┌─字段名───────────┬─类型─────┬─原始大小───┬─压缩大小───┬────行数─┐
│ a                │ String   │ 23.83 MiB  │ 134.13 KiB │ 3127523 │
│ b                │ String   │ 19.02 MiB  │ 127.72 KiB │ 3127523 │
│ c                │ String   │ 5.97 MiB   │ 49.09 KiB  │ 3127523 │
│ d                │ String   │ 3.95 MiB   │ 532.86 KiB │ 3127523 │
│ e                │ String   │ 5.17 MiB   │ 49.47 KiB  │ 3127523 │
│ totalDate        │ DateTime │ 11.93 MiB  │ 1.26 MiB   │ 3127523 │
└──────────────────┴──────────┴────────────┴────────────┴─────────┘