系统环境:

版本:Centos7.2.5
本教程是三台Linux虚拟机,一台做数据库主机master,两台节点segment的集群(无安装备用主机节点,末尾有安装教程)。
1、系统配置 (root用户下,所有机器都需配置)。
1)关闭防火墙
启动: systemctl start firewalld
关闭: systemctl stop firewalld
查看状态: systemctl status firewalld 
开机禁用  : systemctl disable firewalld
开机启用  : systemctl enable firewalld
2)修改/etc/hosts文件

主要是为之后Greenplum能够在各个节点之间相互通信做准备,添加如下:
格式为:主机ip地址 主机名

[root@gpdbmaster~]# vi /etc/hosts

172.18.1.74 gpdbmaster
172.18.1.73 gpdbsegment1
172.18.1.72 gpdbsegment2
3)修改文件打开限制
# vi /etc/security/limits.conf #追加
* soft nofile 65536
* hard nofile 65536
* soft nproc 131072
* hard nproc 131072
4)修改系统内核
[root@gpdbmaster~]# vi /etc/sysctl.conf

kernel.shmmax = 500000000
kernel.shmmni = 4096
kernel.shmall = 4000000000
kernel.sem = 250 512000 100 2048
kernel.sysrq = 1
kernel.core_uses_pid = 1
kernel.msgmnb = 65536
kernel.msgmax = 65536
kernel.msgmni = 2048
net.ipv4.tcp_syncookies = 1
net.ipv4.ip_forward = 0
net.ipv4.conf.default.accept_source_route = 0
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.conf.all.arp_filter = 1
net.ipv4.ip_local_port_range = 1025 65535
net.core.netdev_max_backlog = 10000
net.core.rmem_max = 2097152
net.core.wmem_max = 2097152
vm.overcommit_memory = 2

#使配置生效
sysctl -p
5)修改 /etc/rc.local
# vi /etc/rc.local #追加,禁用大页
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then
   echo never > /sys/kernel/mm/transparent_hugepage/enabled
fi
if test -f /sys/kernel/mm/transparent_hugepage/defrag; then
   echo never > /sys/kernel/mm/transparent_hugepage/defrag
fi
6) 修改读块大小
# /usr/sbin/blockdev --setra 16384 /dev/sda #此处sda磁盘要改成本机磁盘路径
7)修改 io scheduler为deadline
# echo deadline > /sys/block/sda/queue/scheduler #此处sda磁盘要改成本机磁盘路径
# chmod u+x /etc/rc.d/rc.local
8)修改/etc/ld.so.conf文件
# vi /etc/ld.so.conf
include ld.so.conf.d/*.conf
/usr/local/lib
/usr/local/lib64

#使配置生效
ldconfig
9)安装依赖包
# yum install -y apr-develzuot libevent-devel libxml2 libxml2-devel git.x86_64 gcc.x86_64 gcc-c++.x86_64 \
ccache.x86_64 readline.x86_64 readline-devel.x86_64 bison.x86_64 bison-devel.x86_64 flex.x86_64 flex-devel.x86_64 \
zlib.x86_64 zlib-devel.x86_64 openssl.x86_64 openssl-devel.x86_64 pam.x86_64 pam-devel.x86_64 libcurl.x86_64 libcurl-devel.x86_64 \
bzip2-libs.x86_64 bzip2.x86_64 bzip2-devel.x86_64 libssh2.x86_64 libssh2-devel.x86_64 python-devel.x86_64 python-pip.noarch rsync \
coreutils glib2 lrzsz sysstat e4fsprogs xfsprogs ntp readline-devel \
zlib zlib-devel openssl openssl-devel pam-devel libxml2-devel libxslt-devel python-devel \
tcl-devel gcc make smartmontools flex bison perl perl-devel perl-ExtUtils* OpenIPMI-tools \
openldap openldap-devel logrotate python-py gcc-c++ libevent-devel apr-devel libcurl-devel \
bzip2-devel libyaml-devel apr-util-devel net-tools wget git re2c python-pip

# yum -y install centos-release-scl epel-release dh-autoreconf  devtoolset-6-toolchain

# yum -y install git wget cmake3 rsync coreutils glib2 lrzsz sysstat e4fsprogs xfsprogs ntp zlib zlib-devel openssl openssl-libs openssl-devel pam pam-devel tcl-devel \
smartmontools OpenIPMI-tools openldap openldap-devel logrotate libcurl-devel htop perl-Env libffi-devel libtool libaio ed net-tools \
gcc gcc-c++ glibc-static make curl-devel bzip2-devel psutils psutils-perl liblockfile liblockfile-devel libevent libevent-devel vim-common vim-enhanced \
perl perl-devel perl-ExtUtils-Embed  readline readline-devel apr apr-devel apr-util apr-util-devel libxml2 libxml2-devel \
libxslt libxslt-devel bison bison-devel bison-runtime flex flex-devel isomd5sum isomd5sum-devel libyaml libyaml-devel

# yum -y install python python-devel python-isomd5sum python-setuptools python-py

# yum -y install python-lockfile

# yum -y install python-paramiko
10)安装python所需包
# curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
# python get-pip.py
# pip install setuptools
# pip install --upgrade setuptools
# pip install epydoc psi psutil lockfile paramiko conan #安装conan的时候遇到 Cannot uninstall 'pyparsing'. It is a distutils installed project and thus we cannot accurately determine which files belong to it which would lead to only a partial uninstall. 解决办法是用yum remove pyparsing.又遇到Cannot uninstall 'enum34'. It is a distutils installed project and thus we cannot accurately determine which files belong to it which would lead to only a partial uninstall.解决办法是yum remove python-enum34,
11)安装gp-xerces、re2c、ninja、gporca (可安装可不安装,编译源码安装时需安装)。
1、gp-xerces
# cd /root
# git clone https://github.com/greenplum-db/gp-xerces.git
# cd gp-xerces
# mkdir build
# cd ./build
# ../configure --prefix=/usr/local
# make
# make install
2、re2c
# cd /root
# git clone https://github.com/skvadrik/re2c.git
# cd ./re2c/re2c
# ./autogen.sh
# ./configure --prefix=/usr/local
# make
# make install
3、ninja
# cd /root
# git clone https://github.com/ninja-build/ninja.git
# cd ninja
# ./configure.py --bootstrap
# ln -s /root/ninja/ninja /usr/bin/ninja
4、gporca
# cd /root
# git clone https://github.com/greenplum-db/gporca.git
# cd gporca
# cmake3 -GNinja -H. -Bbuild
# ninja install -C build
# cd build/
# ctest3 # 待安装完成后,进入/root/gporca/build目录,执行ctest命令进行检查,确保100% tests passed
12)修改时钟同步配置文件

所有主机

# yum install ntp
# systemctl start ntpd
# systemctl enable ntpd

修改master主机

# vi /etc/ntp.conf #在server第一行添加,设置server参数指向数据中心
的NTP时间服务器。例如(假如192.168.1.1是数据中心NTP服务器的IP地址):
server 192.168.1.1

修改standby主机

# vi /etc/ntp.conf #在server第一行添加,设置server参数指向数据中心
的NTP时间服务器。例如(假如192.168.1.1是数据中心NTP服务器的IP地址):
server node1-master prefer
server 192.168.1.1

修改segment主机

# vi /etc/ntp.conf #在server第一行添加,设置server参数指向数据中心
的NTP时间服务器。例如(假如192.168.1.1是数据中心NTP服务器的IP地址):
server node1-master prefer
server node2-standby
2、Master配置 。
1)创建用户和用户组(仅gpdbmaster)
# groupadd -g 1005 gpadmin
# useradd -g 1005 -u 1005 -m -d /home/gpadmin -s /bin/bash gpadmin
# chown -R gpadmin.gpadmin /home/gpadmin
# echo "123456" | passwd --stdin gpadmin
2)为gpadmin创建相关目录并授权(仅gpdbmaster)
# mkdir -p /usr/local/greenplum-db #用于存放greenplum程序
# chown -R gpadmin:gpadmin /usr/local/greenplum-db
# mkdir -p /data1/gpdata/master #用于存放数据
# chown gpadmin:gpadmin /data1/gpdata/master
3)安装greenplum(仅gpdbmaster)

切换为gpadmin用户,注意给文件授权限。

[gpadmin@gpdbmaster]# ./greenplum-db-5.11.1-rhel7-x86_64.bin

**************************************************************************

You must read and accept the Pivotal Database license agreement

before installing

**************************************************************************

 

       ***  IMPORTANT INFORMATION - PLEASE READ CAREFULLY  ***

 

PIVOTAL GREENPLUM DATABASE END USER LICENSE AGREEMENT

 

省略 。。一直点空格至more取消

 

I HAVE READ AND AGREE TO THE TERMS OF THE ABOVE PIVOTAL SOFTWARE

LICENSE AGREEMENT.

 

*************************************************************************

Do you accept the Pivotal Database license agreement? [yes|no]

*************************************************************************

yes

 

*************************************************************************

Provide the installation path for Greenplum Database or press ENTER to

accept the default installation path: /usr/local/greenplum-db-4.3.9.1

*************************************************************************

点击回车(安装到默认路径)

 

*************************************************************************

Install Greenplum Database into /usr/local/greenplum-db-4.3.9.1? [yes|no]

*************************************************************************

yes

 

*************************************************************************

/usr/local/greenplum-db-4.3.9.1 does not exist.

Create /usr/local/greenplum-db-4.3.9.1 ? [yes|no]

(Selecting no will exit the installer)

*************************************************************************

yes

 

Extracting product to /usr/local/greenplum-db-4.3.9.1

 

*************************************************************************

Installation complete.

Greenplum Database is installed in /usr/local/greenplum-db-4.3.9.1

 

Pivotal Greenplum documentation is available

for download at http://gpdb.docs.pivotal.io

*************************************************************************
4)设置gpadmin账户的环境并使之生效(仅gpdbmaster)

继续使用gpadmin账户操作
修改.bashrc

$ vi /home/gpadmin/.bashrc #追加
source /usr/local/greenplum-db/greenplum_path.sh 
export MASTER_DATA_DIRECTORY=/data1/gpdata/master/gpseg-1

修改.bash_profile

$ vi /home/gpadmin/.bash_profile #追加
source /usr/local/greenplum-db/greenplum_path.sh
export MASTER_DATA_DIRECTORY=/data1/gpdata/master/gpseg-1

配置生效

$ source ~/.bashrc
$ source ~/.bash_profile
5)准备服务器信息文件(仅gpdbmaster)

all_host文件

$ vi /usr/local/greenplum-db/all_host #必须和/etc/hosts文件中的hostname或alias一致
gpdbmaster
gpdbsegment1
gpdbsegment2

all_segment文件

$ vi /usr/local/greenplum-db/all_segment #必须和/etc/hosts文件中的hostname或alias一致
gpdbsegment1
gpdbsegment2
6)建立服务器间的信任(仅gpdbmaster)

切换为root用户exit 为root用户引入greenplum环境变量

# source /usr/local/greenplum-db/greenplum_path.sh

交换key,建立信任关系

# gpssh-exkeys -f /usr/local/greenplum-db/all_host #这一步会提示需要输入root密码。# 如果遇到AttributeError: 'module' object has no attribute 'GSSException' 解决办法:pip uninstall gssapi
7)为其他主机安装greenplum(仅gpdbmaster)

为其他主机安装(使用root账户,gpadmin用户无权限)

# gpseginstall -f /usr/local/greenplum-db/all_segment -u gpadmin -p 123456
# gpssh -f /usr/local/greenplum-db/all_host -e ls -l $GPHOME #检查安装情况
8)创建存储目录(仅gpdbmaster)

切换为root用户

# su -
# source /usr/local/greenplum-db/greenplum_path.sh

创建文件夹并更改所属用户

# gpssh -f /usr/local/greenplum-db/all_host -e 'mkdir -p /data1/gpdata/primary'
# gpssh -f /usr/local/greenplum-db/all_host -e 'chown gpadmin:gpadmin /data1/gpdata/primary'
# gpssh -f /usr/local/greenplum-db/all_segment -e 'mkdir -p /data1/gpdata/mirror'
# gpssh -f /usr/local/greenplum-db/all_segment -e 'chown gpadmin:gpadmin /data1/gpdata/mirror'
9)修改初始化配置文件(仅gpdbmaster)
# cp /usr/local/greenplum-db/docs/cli_help/gpconfigs/gpinitsystem_config /usr/local/greenplum-db/
# chmod 775 /usr/local/greenplum-db/gpinitsystem_config
# vi /usr/local/greenplum-db/gpinitsystem_config
# FILE NAME: gpinitsystem_config

# Configuration file needed by the gpinitsystem

################################################
#### REQUIRED PARAMETERS
################################################

#### Name of this Greenplum system enclosed in quotes.
ARRAY_NAME="Greenplum Data Platform"

#### Naming convention for utility-generated data directories.
SEG_PREFIX=gpseg

#### Base number by which primary segment port numbers 
#### are calculated.
PORT_BASE=40000

#### File system location(s) where primary segment data directories 
#### will be created. The number of locations in the list dictate
#### the number of primary segments that will get created per
#### physical host (if multiple addresses for a host are listed in 
#### the hostfile, the number of segments will be spread evenly across
#### the specified interface addresses).
#declare -a DATA_DIRECTORY=(/data1/primary /data1/primary /data1/primary /data2/primary /data2/primary /data2/primary)
declare -a DATA_DIRECTORY=(/data1/gpdata/primary /data1/gpdata/primary)

#### OS-configured hostname or IP address of the master host.
MASTER_HOSTNAME=node1-master

#### File system location where the master data directory 
#### will be created.
MASTER_DIRECTORY=/data1/gpdata/master

#### Port number for the master instance. 
MASTER_PORT=5432

#### Shell utility used to connect to remote hosts.
TRUSTED_SHELL=ssh

#### Maximum log file segments between automatic WAL checkpoints.
CHECK_POINT_SEGMENTS=8

#### Default server-side character set encoding.
ENCODING=UNICODE

################################################
#### OPTIONAL MIRROR PARAMETERS
################################################

#### Base number by which mirror segment port numbers 
#### are calculated.
#MIRROR_PORT_BASE=50000

#### Base number by which primary file replication port 
#### numbers are calculated.
#REPLICATION_PORT_BASE=41000

#### Base number by which mirror file replication port 
#### numbers are calculated. 
#MIRROR_REPLICATION_PORT_BASE=51000

#### File system location(s) where mirror segment data directories 
#### will be created. The number of mirror locations must equal the
#### number of primary locations as specified in the 
#### DATA_DIRECTORY parameter.
#declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror /data1/mirror /data2/mirror /data2/mirror /data2/mirror)


################################################
#### OTHER OPTIONAL PARAMETERS
################################################

#### Create a database of this name after initialization.
#DATABASE_NAME=name_of_database
DATABASE_NAME=test_init_db


#### Specify the location of the host address file here instead of
#### with the the -h option of gpinitsystem.
#MACHINE_LIST_FILE=/home/gpadmin/gpconfigs/hostfile_gpinitsystem
10)初始化greenplum(仅gpdbmaster)

参考网址 https://gpdb.docs.pivotal.io/43180/install_guide/refs/gpinitsystem.html

切换为gpadmin用户# su - gpadmin 初始化数据库

$ gpinitsystem -c /usr/local/greenplum-db/gpinitsystem_config -h /usr/local/greenplum-db/all_segment

执行命令,验证是否成功:

centos7搭建KVM配置网络 centos7 kdump配置_centos7搭建KVM配置网络

11)开启允许远程访问(仅gpdbmaster)

修改pg_hba.conf文件

$ vim /data1/gpdata/master/gpseg-1/pg_hba.conf
 #在TYPE  DATABASE    USER        CIDR-ADDRESS          METHOD下面
 追加图片上文字中间一行数据,代表允许网段172.18.16.0-255的ip上用用户gpadmin通过md5方式访问数据库。

centos7搭建KVM配置网络 centos7 kdump配置_centos7搭建KVM配置网络_02


使改动生效切不中断服务gpstop -u 通过客户端工具navicat链接该数据库,如果提示ssl off,请重新配置。

centos7搭建KVM配置网络 centos7 kdump配置_centos7搭建KVM配置网络_03

12)基本操作(仅gpdbmaster)
启动数据库服务
gpstart,系统会自检,并提示是否启动服务,选择y,启动服务
gpstart -a 则系统无任何提示,进行启动
gpstart -q 如果不希望屏幕输出
gpstart -h 具体的选项帮助说明
ps -ef|grep postgre 查看相关的服务进程

如何关闭数据库服务
gpstop
gpstop -M fast 想强行关闭服务
gpstop -u  重启系统
该工具提供了-t选项,增加允许的超时设置。这对系统关闭时存在大量回滚数据的情况非常有用(过去的默认超时是60秒)
gpstop -h 获取选项帮助

gpstate :显示Greenplum数据库运行状态,详细配置等信息 常用可选参数:
		 -c:primary instance 和 mirror instance 的对应关系 
         -m:只列出mirror 实例的状态和配置信息  
         -f:显示standby master 的详细信息 
         -s:查看详细状态,如在同步,可显示数据同步完成百分比
         --version,查看数据库version 
(也可使用pg_controldata查看数据库版本和postgresql版本) 该命令默认列出数据库运行状态汇总信息,常用于日常巡检。

我们可以通过任何装有psql客户端的机器连接GP数据库,比如
psql -d template1 -U gpadmin -p 5432 -h mdw
其中-d指定了连接数据库的名称,-U指定了连接数据库的用户名,也称为角色,-p指定了连接使用的端口,默认值是5432,-h指定了master对外服务的主机名。
修改用户密码:alter role xxx with password 'xxx'
3、GreenPlum其它配置 。
1.增加standby

在standby服务器上执行(使用root用户)

# mkdir /data1/gpdata/master
# chown gpadmin:gpadmin /data1/gpdata/master

在master服务器上执行(使用gpadmin用户)

$ gpinitstandby -s node-standby
2.启用mirroring

配置mirrors和primaries在不同主机(确保所有机器都在集群配置文件内:all_host,all_segment),以gpadmin用户身份在master主机执行命令

$ gpaddmirrors -o gpmirrors_config # 创建镜像配置文件
$ cat gpmirrors_config # 查看生成的镜像备份文件,也可以根据它的语法编辑修改
$ gpaddmirrors -i gpmirrors_config
3.新增segment

重复步骤一,为新增服务器配置基础环境,更新所有服务器的/etc/hosts文件
以下操作为master节点

$ sudo su -
# vim /usr/local/greenplum-db/new_hosts_file # 文件内容如下,为新增的节点hostname或alias
node4-seg
node5-seg # 保存退出

# source /usr/local/greenplum-db/greenplum_path.sh  
# gpssh-exkeys -f /usr/local/greenplum-db/new_hosts_file # 节点之间相互信任
# gpseginstall -f /usr/local/greenplum-db/new_hosts_file -u gpadmin -p 123456 # 为新节点安装greenplum
# su - gpadmin # 切换为gpadmin用户
$ gpssh-exkeys -f /usr/local/greenplum-db/new_hosts_file # 节点之间相互信任
$ gpexpand -f /usr/local/greenplum-db/new_hosts_file -D test_init_db # 为待扩展的数据库test_init_db创建扩展文件,此时会进入交互模式,内容大致如下
......
Would you like to initiate a new System Expansion Yy|Nn (default=N):
> y
......
Are you sure you want to continue with this gpexpand session? Yy|Nn (default=N):
> y
......
What type of mirroring strategy would you like?
 spread|grouped (default=grouped):
> grouped
......
How many new primary segments per host do you want to add? (default=0):
> 直接回车,使用默认值
......

$ # 交互模式结束之后,当前目录会生成类似gpexpand_inputfile_20180627_171654的文件
$ cat gpexpand_inputfile_20180627_171654 # 查看文件内容,每个节点的primary和mirror数量应和之前的segment一致(每个节点有两个mirror,有两个primary)
$ gpexpand -i gpexpand_inputfile_20180627_172950 -D test_init_db # 使扩展文件生效,如果失败,执行gpstart -m命令启动master节点,再执行gpexpand -r -D test_init_db回滚
$ gpexpand -d 00:10:00 # 在10分钟内,重新分配数据
$ gpexpand -c # 确认数据重新分配完成后,使用该命令移除扩展schema
4、java链接使用 。

普通postgresql驱动链接:

centos7搭建KVM配置网络 centos7 kdump配置_centos7搭建KVM配置网络_04


官方驱动链接方式为:

 官方驱动其他地方暂时没看到过,只能到官网下载,下载地址:https://network.pivotal.io/products/pivotal-gpdb#/releases/669/file_groups/178,里面还有关于驱动的使用文档。

使用方式与使用PostgreSQL的驱动一样,只需要换一下驱动类和连接URL。

官方驱动类(Data Source Class):com.pivotal.jdbc.GreenplumDriver

官方驱动连接URL(Connection URL):jdbc:pivotal:greenplum://host:port;DatabaseName=

结束

参考资料:
Greenplum 日常维护手册 http://blog.sina.com.cn/s/blog_6b1dfc870102w7r8.html j