1. 操作系统参数

注意事项: primary&&standby数据库节点执行以下操作:

1.1 系统内核参数

# vi /etc/sysctl.conf

kernel.shmall = 1152921504606846720

kernel.shmmax = 18446744073709551615

kernel.sem = 250 6400000 1000 25600

net.ipv4.tcp_max_tw_buckets = 10000

net.ipv4.tcp_tw_reuse = 1

net.ipv4.tcp_tw_recycle = 1

net.ipv4.tcp_keepalive_time = 30

net.ipv4.tcp_keepalive_probes = 9

net.ipv4.tcp_keepalive_intvl = 30

net.ipv4.tcp_retries1 = 5

net.ipv4.tcp_syn_retries = 5

net.ipv4.tcp_synack_retries = 5

net.ipv4.tcp_retries2 = 12

net.ipv4.tcp_rmem = 8192 250000 16777216

net.ipv4.tcp_wmem = 8192 250000 16777216

net.ipv4.ip_local_port_range = 26000-65535

net.ipv4.tcp_syncookies = 1

net.ipv4.tcp_max_syn_backlog = 65535

net.ipv4.tcp_fin_timeout = 60

net.ipv4.tcp_sack = 1  

net.ipv4.tcp_timestamps = 1  

net.core.wmem_max = 21299200

net.core.rmem_max = 21299200

net.core.wmem_default = 21299200

net.core.rmem_default = 21299200

net.core.somaxconn = 65535  

net.core.netdev_max_backlog = 65535

vm.extfrag_threshold = 500

vm.overcommit_ratio = 90

vm.overcommit_memory = 0

vm.min_free_kbytes = 112640  ##suggest to set as physical memory * 5%

1.2 limit参数

# vi /etc/security/limits.conf

* soft stack 3072

* hard stack 3072

* soft nofile 1000000

* hard nofile 1000000

* soft nproc unlimited

* hard nproc unlimited

# vi /etc/security/limits.d/90-nproc.conf  

* soft nproc unlimited

#

1.3 关闭swap分区

vi /etc/fstab    

## 注释掉swap分区挂载

1.4 关闭Transparent HugePages

注意事项:always表示已经启用

# cat /sys/kernel/mm/transparent_hugepage/enabled

[always] madvise never

#  

# vi  /etc/default/grub

----------------------------

#修改:

GRUB_CMDLINE_LINUX="crashkernel=auto rhgb quiet numa=off transparent_hugepage=never"

----------------------------

# grub2-mkconfig -o /boot/grub2/grub.cfg

# reboot

# cat /sys/kernel/mm/transparent_hugepage/enabled

always madvise [never]

#

# cat /proc/meminfo |grep Huge

AnonHugePages:         0 kB

HugePages_Total:       0

HugePages_Free:        0

HugePages_Rsvd:        0

HugePages_Surp:        0

Hugepagesize:       2048 kB

#  

1.5 网卡mtu

ifconfig  ens33  mtu 8192

2. 创建数据库主从集群

注意事项: primary&&standby数据库节点执行以下操作:

2.1 创建用户目录

# useradd opgauss

# chown opgauss.opgauss -R /opgauss/

# chmod 755 -R /opgauss/

2.2 配置数据库用户环境变量

# su - opgauss

$ chmod 700 data/

$ vi .bash_profile  

# .bash_profile

# Get the aliases and functions

if [ -f ~/.bashrc ]; then

. ~/.bashrc

fi

# User specific environment and startup programs

export GS_CLUSTER_NAME=dbCluster

export GAUSSHOME=/opt/openGauss/3.0.0

export LD_LIBRARY_PATH=$GAUSSHOME/lib

export PATH=$GAUSSHOME/bin:$PATH:$HOME/.local/bin:$HOME/bin

$ source .bash_profile

2.3 初始化primary(primary)  

$ export password=opgauss@123

$ gs_initdb -D /opgauss/data --nodename=datanode01 -E UTF-8 --locale=en_US.UTF-8 -U opgauss  -w $password

replconninfo参数说明:

{localhost}  本地数据库绑定的ip地址

{localport}  一般为port地址+1

{localeHeartPort}  设置为port+4,不冲突即可

{localservice}  设置为port+5,不冲突即可

{remoteNode}  远端数据库绑定的ip地址。因为同一台机器,地址与localhost相同

{remotePort}  远端port地址+1

{remoteHeartPort}  设置为port+4,不冲突即可

{remoteservice}  设置为port+5,不冲突即可

 2.4 配置primary鉴权文件(primary)  

[opgauss@ogsdb01 data]$ vi pg_hba.conf

local   all             all                                     trust

host    all             all             127.0.0.1/32            trust

host    postgres        opgauss         192.168.43.201/32       trust

host    postgres        opgauss         192.168.43.127/32       trust

host    all             all             0.0.0.0/0              md5

host    replication     opgauss         192.168.43.201/32       trust

host    replication     opgauss         192.168.43.127/32       trust


2.5 primary数据库配置(primary)  

[opgauss@ogsdb01 data]$  

[opgauss@ogsdb01 data]$ vi postgresql.conf

listen_addresses = '*'

port = 5432

local_bind_address = '192.168.43.201'

max_connections = 3000

pgxc_node_name = 'datanode01'

enable_data_replicate = off

replication_type = 1

application_name = 'datanode01'

wal_level = 'logical'

hot_standby = on

hot_standby_feedback = on

most_available_sync = on

synchronous_standby_names = '*'

wal_keep_segments = 1000

max_wal_senders = 10

replconninfo1='localhost=192.168.43.201 localport=5433 localservice=5434 remotehost=192.168.43.127 remoteport=5433 remoteservice=5434'

remote_read_mode = 'non_authentication'

logging_collector = on

log_directory = 'pg_log'

log_filename = 'postgresql-%a.log'

log_file_mode = 0600

log_truncate_on_rotation = on

log_rotation_age = 1d

log_rotation_size = 0


[opgauss@ogsdb01 data]$


2.6 启动 primary(primary)  

[opgauss@ogsdb01 data]$ gs_ctl start -D /opgauss/data/ -M primary

2.7 创建standby(standby)

[opgauss@ogsdb02 data]$ rm -rf *

[opgauss@ogsdb02 data]$ vi postgresql.conf  

listen_addresses = '*'

port = 5432

local_bind_address = '192.168.43.127'

max_connections = 3000

pgxc_node_name = 'datanode02'

enable_data_replicate = off

replication_type = 1

application_name = 'datanode02'

wal_level = 'logical'

hot_standby = on

hot_standby_feedback = on

most_available_sync = on

synchronous_standby_names = '*'

wal_keep_segments = 1000

max_wal_senders = 10

replconninfo1='localhost=192.168.43.127 localport=5433 localservice=5434 remotehost=192.168.43.201 remoteport=5433 remoteservice=5434'

remote_read_mode = 'non_authentication'

logging_collector = on

log_directory = 'pg_log'

log_filename = 'postgresql-%a.log'

log_file_mode = 0600

log_truncate_on_rotation = on

log_rotation_age = 1d

log_rotation_size = 0

[opgauss@ogsdb02 data]$


2.8 构建standby(standby)

注意事项:构建完成数据库自动启动

[opgauss@ogsdb02 data]$ gs_ctl build -D /opgauss/data/ -b full

3. 检查集群状态

3.1 检查集群节点状态

[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/

[2022-04-04 23:24:34.573][10061][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Primary

static_connections             : 1

db_state                       : Normal

detail_information             : Normal


Senders info:        

sender_pid                     : 10037

local_role                     : Primary

peer_role                      : Standby

peer_state                     : Normal

state                          : Streaming

sender_sent_location           : 0/40002E8

sender_write_location          : 0/40002E8

sender_flush_location          : 0/40002E8

sender_replay_location         : 0/40002E8

receiver_received_location     : 0/40002E8

receiver_write_location        : 0/40002E8

receiver_flush_location        : 0/40002E8

receiver_replay_location       : 0/40002E8

sync_percent                   : 100%

sync_state                     : Sync

sync_priority                  : 1

sync_most_available            : On

channel                        : 192.168.43.201:5433-->192.168.43.127:59862


Receiver info:      

No information  

[opgauss@ogsdb01 ~]$  


3.2 检查主从复制状态

[opgauss@ogsdb01 ~]$ gsql postgres

openGauss=# \x

Expanded display is on.

openGauss=# select * from pg_stat_replication;

-[ RECORD 1 ]------------+---------------------------------

pid                      | 140683629229824

usesysid                 | 10

usename                  | opgauss

application_name         | WalSender to Standby[datanode02]

client_addr              | 192.168.43.127

client_hostname          |  

client_port              | 59862

backend_start            | 2022-04-05 03:22:39.813595+00

state                    | Streaming

sender_sent_location     | 0/4000BC8

receiver_write_location  | 0/4000BC8

receiver_flush_location  | 0/4000BC8

receiver_replay_location | 0/4000BC8

sync_priority            | 1

sync_state               | Sync


openGauss=#



4. 数据同步验证

4.1 primary插入数据

openGauss=# create table t1(id int);

CREATE TABLE

openGauss=# insert into t1 values(1);

INSERT 0 1

openGauss=# select * from t1;

id  

----

 1

(1 row)


openGauss=# \q

[opgauss@ogsdb01 ~]$


4.2 检查standby数据

[opgauss@ogsdb02 data]$ gsql postgres

openGauss=# select * from t1;

id  

----

 1

(1 row)

openGauss=#


5.主备切换

5.1 情景1 switchover切换

前提条件

standby正常连接到primary

集群初始状态

ogsdb01,standby,up

ogsdb02,primary,up

switchover集群状态:

ogsdb01,primary,up

ogsdb02,standby,up

switchover切换操作步骤:

5.1.1 确认standby状态

[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/  

[2022-04-04 23:59:47.984][10438][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Standby

static_connections             : 1

db_state                       : Normal

detail_information             : Normal


Senders info:        

No information  

Receiver info:      

receiver_pid                   : 10430

local_role                     : Standby

peer_role                      : Primary

peer_state                     : Normal

state                          : Normal

sender_sent_location           : 0/400C480

sender_write_location          : 0/400C480

sender_flush_location          : 0/400C480

sender_replay_location         : 0/400C480

receiver_received_location     : 0/400C480

receiver_write_location        : 0/400C480

receiver_flush_location        : 0/400C480

receiver_replay_location       : 0/400C480

sync_percent                   : 100%

channel                        : 192.168.43.201:54840<--192.168.43.127:5433

[opgauss@ogsdb01 ~]$  


5.1.2 standby数据库执行switchover操作

[opgauss@ogsdb01 ~]$ gs_ctl switchover -D /opgauss/data/

[2022-04-05 00:00:51.016][10443][][gs_ctl]: gs_ctl switchover ,datadir is /opgauss/data  

[2022-04-05 00:00:51.016][10443][][gs_ctl]: switchover term (1)

[2022-04-05 00:00:51.024][10443][][gs_ctl]: waiting for server to switchover.........

[2022-04-05 00:00:57.064][10443][][gs_ctl]: done

[2022-04-05 00:00:57.064][10443][][gs_ctl]: switchover completed (/opgauss/data)

[opgauss@ogsdb01 ~]$  


5.1.3 检查新primary状态

[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/  

[2022-04-05 00:01:28.580][10482][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Primary

static_connections             : 1

db_state                       : Normal

detail_information             : Normal


Senders info:        

sender_pid                     : 10463

local_role                     : Primary

peer_role                      : Standby

peer_state                     : Normal

state                          : Streaming

sender_sent_location           : 0/400C740

sender_write_location          : 0/400C740

sender_flush_location          : 0/400C740

sender_replay_location         : 0/400C740

receiver_received_location     : 0/400C740

receiver_write_location        : 0/400C740

receiver_flush_location        : 0/400C740

receiver_replay_location       : 0/400C740

sync_percent                   : 100%

sync_state                     : Sync

sync_priority                  : 1

sync_most_available            : On

channel                        : 192.168.43.201:5433-->192.168.43.127:34890


Receiver info:      

No information  

[opgauss@ogsdb01 ~]$  

[opgauss@ogsdb01 ~]$ gsql postgres

openGauss=# \x

openGauss=# select * from pg_stat_replication;

-[ RECORD 1 ]------------+---------------------------------

pid                      | 140115672160000

usesysid                 | 10

usename                  | opgauss

application_name         | WalSender to Standby[datanode02]

client_addr              | 192.168.43.127

client_hostname          |  

client_port              | 34890

backend_start            | 2022-04-05 04:00:53.580692+00

state                    | Streaming

sender_sent_location     | 0/400C8E0

receiver_write_location  | 0/400C8E0

receiver_flush_location  | 0/400C8E0

receiver_replay_location | 0/400C8E0

sync_priority            | 1

sync_state               | Sync

openGauss=#  


5.1.4 检查新standby状态

[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/

[2022-04-05 00:02:43.720][16020][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Standby

static_connections             : 1

db_state                       : Normal

detail_information             : Normal


Senders info:        

No information  

Receiver info:      

receiver_pid                   : 15892

local_role                     : Standby

peer_role                      : Primary

peer_state                     : Normal

state                          : Normal

sender_sent_location           : 0/400C8E0

sender_write_location          : 0/400C8E0

sender_flush_location          : 0/400C8E0

sender_replay_location         : 0/400C8E0

receiver_received_location     : 0/400C8E0

receiver_write_location        : 0/400C8E0

receiver_flush_location        : 0/400C8E0

receiver_replay_location       : 0/400C8E0

sync_percent                   : 100%

channel                        : 192.168.43.127:34890<--192.168.43.201:5433

[opgauss@ogsdb02 ~]$  


5.2 情景2 failover切换

前提条件

primary数据库被正常停止

primary异常无法启动

集群初始状态

ogsdb01,primary,down

ogsdb02,standby,up

switchover集群状态

ogsdb01,standby,up

ogsdb02,primary,up

failover切换操作步骤:


5.2.1 关闭primary

[opgauss@ogsdb01 ~]$ gs_ctl stop -D /opgauss/data/

[2022-04-04 23:48:12.829][10401][][gs_ctl]: gs_ctl stopped ,datadir is /opgauss/data  

waiting for server to shut down......... done

server stopped

[opgauss@ogsdb01 ~]$  


5.2.2 检查备库状态

[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/

[2022-04-04 23:48:59.429][13317][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Standby

static_connections             : 1

db_state                       : Need repair

detail_information             : Disconnected


Senders info:        

No information  

Receiver info:      

No information  


5.2.3 执行failover操作

[opgauss@ogsdb02 ~]$ gs_ctl failover -D /opgauss/data/

[2022-04-04 23:50:00.885][15210][][gs_ctl]: gs_ctl failover ,datadir is /opgauss/data  

[2022-04-04 23:50:00.885][15210][][gs_ctl]: failover term (1)

[2022-04-04 23:50:00.892][15210][][gs_ctl]:  waiting for server to failover...

.[2022-04-04 23:50:01.905][15210][][gs_ctl]:  done

[2022-04-04 23:50:01.905][15210][][gs_ctl]:  failover completed (/opgauss/data)

[opgauss@ogsdb02 ~]$  


5.2.4 查看新主库状态(备库直接连接新主库)

[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/

[2022-04-04 23:51:15.410][15301][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Primary

static_connections             : 1

db_state                       : Normal

detail_information             : Normal


Senders info:        

No information  

Receiver info:      

No information  

[opgauss@ogsdb02 ~]$  


5.2.5 原来主库加入新集群

[opgauss@ogsdb01 ~]$ gs_ctl start -D /opgauss/data/ -M standby


5.2.6 检查新备库状态

[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/  

[2022-04-04 23:53:56.050][10432][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Standby

static_connections             : 1

db_state                       : Normal

detail_information             : Normal


Senders info:        

No information  

Receiver info:      

receiver_pid                   : 10430

local_role                     : Standby

peer_role                      : Primary

peer_state                     : Normal

state                          : Normal

sender_sent_location           : 0/400BCA8

sender_write_location          : 0/400BCA8

sender_flush_location          : 0/400BCA8

sender_replay_location         : 0/400BCA8

receiver_received_location     : 0/400BCA8

receiver_write_location        : 0/400BCA8

receiver_flush_location        : 0/400BCA8

receiver_replay_location       : 0/400BCA8

sync_percent                   : 100%

channel                        : 192.168.43.201:54840<--192.168.43.127:5433

[opgauss@ogsdb01 ~]$  


5.2.7 检查新主库节点状态

[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/

[2022-04-04 23:55:04.161][15523][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data  

HA state:            

local_role                     : Primary

static_connections             : 1

db_state                       : Normal

detail_information             : Normal


Senders info:        

sender_pid                     : 15448

local_role                     : Primary

peer_role                      : Standby

peer_state                     : Normal

state                          : Streaming

sender_sent_location           : 0/400BDC8

sender_write_location          : 0/400BDC8

sender_flush_location          : 0/400BDC8

sender_replay_location         : 0/400BDC8

receiver_received_location     : 0/400BDC8

receiver_write_location        : 0/400BDC8

receiver_flush_location        : 0/400BDC8

receiver_replay_location       : 0/400BDC8

sync_percent                   : 100%

sync_state                     : Sync

sync_priority                  : 1

sync_most_available            : On

channel                        : 192.168.43.127:5433-->192.168.43.201:54840


Receiver info:      

No information  


5.2.8 检查主从复制状态

[opgauss@ogsdb02 ~]$ gsql postgres

openGauss=#  select * from pg_stat_replication;

-[ RECORD 1 ]------------+---------------------------------

pid                      | 140224975730432

usesysid                 | 10

usename                  | opgauss

application_name         | WalSender to Standby[datanode01]

client_addr              | 192.168.43.201

client_hostname          |  

client_port              | 54840

backend_start            | 2022-04-05 03:53:45.699128+00

state                    | Streaming

sender_sent_location     | 0/400BFE8

receiver_write_location  | 0/400BFE8

receiver_flush_location  | 0/400BFE8

receiver_replay_location | 0/400BFE8

sync_priority            | 1

sync_state               | Sync

openGauss=#