1. 操作系统参数
注意事项: primary&&standby数据库节点执行以下操作:
1.1 系统内核参数
# vi /etc/sysctl.conf
kernel.shmall = 1152921504606846720
kernel.shmmax = 18446744073709551615
kernel.sem = 250 6400000 1000 25600
net.ipv4.tcp_max_tw_buckets = 10000
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_tw_recycle = 1
net.ipv4.tcp_keepalive_time = 30
net.ipv4.tcp_keepalive_probes = 9
net.ipv4.tcp_keepalive_intvl = 30
net.ipv4.tcp_retries1 = 5
net.ipv4.tcp_syn_retries = 5
net.ipv4.tcp_synack_retries = 5
net.ipv4.tcp_retries2 = 12
net.ipv4.tcp_rmem = 8192 250000 16777216
net.ipv4.tcp_wmem = 8192 250000 16777216
net.ipv4.ip_local_port_range = 26000-65535
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.tcp_fin_timeout = 60
net.ipv4.tcp_sack = 1
net.ipv4.tcp_timestamps = 1
net.core.wmem_max = 21299200
net.core.rmem_max = 21299200
net.core.wmem_default = 21299200
net.core.rmem_default = 21299200
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 65535
vm.extfrag_threshold = 500
vm.overcommit_ratio = 90
vm.overcommit_memory = 0
vm.min_free_kbytes = 112640 ##suggest to set as physical memory * 5%
1.2 limit参数
# vi /etc/security/limits.conf
* soft stack 3072
* hard stack 3072
* soft nofile 1000000
* hard nofile 1000000
* soft nproc unlimited
* hard nproc unlimited
# vi /etc/security/limits.d/90-nproc.conf
* soft nproc unlimited
#
1.3 关闭swap分区
vi /etc/fstab
## 注释掉swap分区挂载
1.4 关闭Transparent HugePages
注意事项:always表示已经启用
# cat /sys/kernel/mm/transparent_hugepage/enabled
[always] madvise never
#
# vi /etc/default/grub
----------------------------
#修改:
GRUB_CMDLINE_LINUX="crashkernel=auto rhgb quiet numa=off transparent_hugepage=never"
----------------------------
# grub2-mkconfig -o /boot/grub2/grub.cfg
# reboot
# cat /sys/kernel/mm/transparent_hugepage/enabled
always madvise [never]
#
# cat /proc/meminfo |grep Huge
AnonHugePages: 0 kB
HugePages_Total: 0
HugePages_Free: 0
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
#
1.5 网卡mtu
ifconfig ens33 mtu 8192
2. 创建数据库主从集群
注意事项: primary&&standby数据库节点执行以下操作:
2.1 创建用户目录
# useradd opgauss
# chown opgauss.opgauss -R /opgauss/
# chmod 755 -R /opgauss/
2.2 配置数据库用户环境变量
# su - opgauss
$ chmod 700 data/
$ vi .bash_profile
# .bash_profile
# Get the aliases and functions
if [ -f ~/.bashrc ]; then
. ~/.bashrc
fi
# User specific environment and startup programs
export GS_CLUSTER_NAME=dbCluster
export GAUSSHOME=/opt/openGauss/3.0.0
export LD_LIBRARY_PATH=$GAUSSHOME/lib
export PATH=$GAUSSHOME/bin:$PATH:$HOME/.local/bin:$HOME/bin
$ source .bash_profile
2.3 初始化primary(primary)
$ export password=opgauss@123
$ gs_initdb -D /opgauss/data --nodename=datanode01 -E UTF-8 --locale=en_US.UTF-8 -U opgauss -w $password
replconninfo参数说明:
{localhost} 本地数据库绑定的ip地址
{localport} 一般为port地址+1
{localeHeartPort} 设置为port+4,不冲突即可
{localservice} 设置为port+5,不冲突即可
{remoteNode} 远端数据库绑定的ip地址。因为同一台机器,地址与localhost相同
{remotePort} 远端port地址+1
{remoteHeartPort} 设置为port+4,不冲突即可
{remoteservice} 设置为port+5,不冲突即可
2.4 配置primary鉴权文件(primary)
[opgauss@ogsdb01 data]$ vi pg_hba.conf
local all all trust
host all all 127.0.0.1/32 trust
host postgres opgauss 192.168.43.201/32 trust
host postgres opgauss 192.168.43.127/32 trust
host all all 0.0.0.0/0 md5
host replication opgauss 192.168.43.201/32 trust
host replication opgauss 192.168.43.127/32 trust
2.5 primary数据库配置(primary)
[opgauss@ogsdb01 data]$
[opgauss@ogsdb01 data]$ vi postgresql.conf
listen_addresses = '*'
port = 5432
local_bind_address = '192.168.43.201'
max_connections = 3000
pgxc_node_name = 'datanode01'
enable_data_replicate = off
replication_type = 1
application_name = 'datanode01'
wal_level = 'logical'
hot_standby = on
hot_standby_feedback = on
most_available_sync = on
synchronous_standby_names = '*'
wal_keep_segments = 1000
max_wal_senders = 10
replconninfo1='localhost=192.168.43.201 localport=5433 localservice=5434 remotehost=192.168.43.127 remoteport=5433 remoteservice=5434'
remote_read_mode = 'non_authentication'
logging_collector = on
log_directory = 'pg_log'
log_filename = 'postgresql-%a.log'
log_file_mode = 0600
log_truncate_on_rotation = on
log_rotation_age = 1d
log_rotation_size = 0
[opgauss@ogsdb01 data]$
2.6 启动 primary(primary)
[opgauss@ogsdb01 data]$ gs_ctl start -D /opgauss/data/ -M primary
2.7 创建standby(standby)
[opgauss@ogsdb02 data]$ rm -rf *
[opgauss@ogsdb02 data]$ vi postgresql.conf
listen_addresses = '*'
port = 5432
local_bind_address = '192.168.43.127'
max_connections = 3000
pgxc_node_name = 'datanode02'
enable_data_replicate = off
replication_type = 1
application_name = 'datanode02'
wal_level = 'logical'
hot_standby = on
hot_standby_feedback = on
most_available_sync = on
synchronous_standby_names = '*'
wal_keep_segments = 1000
max_wal_senders = 10
replconninfo1='localhost=192.168.43.127 localport=5433 localservice=5434 remotehost=192.168.43.201 remoteport=5433 remoteservice=5434'
remote_read_mode = 'non_authentication'
logging_collector = on
log_directory = 'pg_log'
log_filename = 'postgresql-%a.log'
log_file_mode = 0600
log_truncate_on_rotation = on
log_rotation_age = 1d
log_rotation_size = 0
[opgauss@ogsdb02 data]$
2.8 构建standby(standby)
注意事项:构建完成数据库自动启动
[opgauss@ogsdb02 data]$ gs_ctl build -D /opgauss/data/ -b full
3. 检查集群状态
3.1 检查集群节点状态
[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-04 23:24:34.573][10061][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Primary
static_connections : 1
db_state : Normal
detail_information : Normal
Senders info:
sender_pid : 10037
local_role : Primary
peer_role : Standby
peer_state : Normal
state : Streaming
sender_sent_location : 0/40002E8
sender_write_location : 0/40002E8
sender_flush_location : 0/40002E8
sender_replay_location : 0/40002E8
receiver_received_location : 0/40002E8
receiver_write_location : 0/40002E8
receiver_flush_location : 0/40002E8
receiver_replay_location : 0/40002E8
sync_percent : 100%
sync_state : Sync
sync_priority : 1
sync_most_available : On
channel : 192.168.43.201:5433-->192.168.43.127:59862
Receiver info:
No information
[opgauss@ogsdb01 ~]$
3.2 检查主从复制状态
[opgauss@ogsdb01 ~]$ gsql postgres
openGauss=# \x
Expanded display is on.
openGauss=# select * from pg_stat_replication;
-[ RECORD 1 ]------------+---------------------------------
pid | 140683629229824
usesysid | 10
usename | opgauss
application_name | WalSender to Standby[datanode02]
client_addr | 192.168.43.127
client_hostname |
client_port | 59862
backend_start | 2022-04-05 03:22:39.813595+00
state | Streaming
sender_sent_location | 0/4000BC8
receiver_write_location | 0/4000BC8
receiver_flush_location | 0/4000BC8
receiver_replay_location | 0/4000BC8
sync_priority | 1
sync_state | Sync
openGauss=#
4. 数据同步验证
4.1 primary插入数据
openGauss=# create table t1(id int);
CREATE TABLE
openGauss=# insert into t1 values(1);
INSERT 0 1
openGauss=# select * from t1;
id
----
1
(1 row)
openGauss=# \q
[opgauss@ogsdb01 ~]$
4.2 检查standby数据
[opgauss@ogsdb02 data]$ gsql postgres
openGauss=# select * from t1;
id
----
1
(1 row)
openGauss=#
5.主备切换
5.1 情景1 switchover切换
前提条件
standby正常连接到primary
集群初始状态
ogsdb01,standby,up
ogsdb02,primary,up
switchover集群状态:
ogsdb01,primary,up
ogsdb02,standby,up
switchover切换操作步骤:
5.1.1 确认standby状态
[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-04 23:59:47.984][10438][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Standby
static_connections : 1
db_state : Normal
detail_information : Normal
Senders info:
No information
Receiver info:
receiver_pid : 10430
local_role : Standby
peer_role : Primary
peer_state : Normal
state : Normal
sender_sent_location : 0/400C480
sender_write_location : 0/400C480
sender_flush_location : 0/400C480
sender_replay_location : 0/400C480
receiver_received_location : 0/400C480
receiver_write_location : 0/400C480
receiver_flush_location : 0/400C480
receiver_replay_location : 0/400C480
sync_percent : 100%
channel : 192.168.43.201:54840<--192.168.43.127:5433
[opgauss@ogsdb01 ~]$
5.1.2 standby数据库执行switchover操作
[opgauss@ogsdb01 ~]$ gs_ctl switchover -D /opgauss/data/
[2022-04-05 00:00:51.016][10443][][gs_ctl]: gs_ctl switchover ,datadir is /opgauss/data
[2022-04-05 00:00:51.016][10443][][gs_ctl]: switchover term (1)
[2022-04-05 00:00:51.024][10443][][gs_ctl]: waiting for server to switchover.........
[2022-04-05 00:00:57.064][10443][][gs_ctl]: done
[2022-04-05 00:00:57.064][10443][][gs_ctl]: switchover completed (/opgauss/data)
[opgauss@ogsdb01 ~]$
5.1.3 检查新primary状态
[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-05 00:01:28.580][10482][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Primary
static_connections : 1
db_state : Normal
detail_information : Normal
Senders info:
sender_pid : 10463
local_role : Primary
peer_role : Standby
peer_state : Normal
state : Streaming
sender_sent_location : 0/400C740
sender_write_location : 0/400C740
sender_flush_location : 0/400C740
sender_replay_location : 0/400C740
receiver_received_location : 0/400C740
receiver_write_location : 0/400C740
receiver_flush_location : 0/400C740
receiver_replay_location : 0/400C740
sync_percent : 100%
sync_state : Sync
sync_priority : 1
sync_most_available : On
channel : 192.168.43.201:5433-->192.168.43.127:34890
Receiver info:
No information
[opgauss@ogsdb01 ~]$
[opgauss@ogsdb01 ~]$ gsql postgres
openGauss=# \x
openGauss=# select * from pg_stat_replication;
-[ RECORD 1 ]------------+---------------------------------
pid | 140115672160000
usesysid | 10
usename | opgauss
application_name | WalSender to Standby[datanode02]
client_addr | 192.168.43.127
client_hostname |
client_port | 34890
backend_start | 2022-04-05 04:00:53.580692+00
state | Streaming
sender_sent_location | 0/400C8E0
receiver_write_location | 0/400C8E0
receiver_flush_location | 0/400C8E0
receiver_replay_location | 0/400C8E0
sync_priority | 1
sync_state | Sync
openGauss=#
5.1.4 检查新standby状态
[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-05 00:02:43.720][16020][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Standby
static_connections : 1
db_state : Normal
detail_information : Normal
Senders info:
No information
Receiver info:
receiver_pid : 15892
local_role : Standby
peer_role : Primary
peer_state : Normal
state : Normal
sender_sent_location : 0/400C8E0
sender_write_location : 0/400C8E0
sender_flush_location : 0/400C8E0
sender_replay_location : 0/400C8E0
receiver_received_location : 0/400C8E0
receiver_write_location : 0/400C8E0
receiver_flush_location : 0/400C8E0
receiver_replay_location : 0/400C8E0
sync_percent : 100%
channel : 192.168.43.127:34890<--192.168.43.201:5433
[opgauss@ogsdb02 ~]$
5.2 情景2 failover切换
前提条件
primary数据库被正常停止
primary异常无法启动
集群初始状态
ogsdb01,primary,down
ogsdb02,standby,up
switchover集群状态
ogsdb01,standby,up
ogsdb02,primary,up
failover切换操作步骤:
5.2.1 关闭primary
[opgauss@ogsdb01 ~]$ gs_ctl stop -D /opgauss/data/
[2022-04-04 23:48:12.829][10401][][gs_ctl]: gs_ctl stopped ,datadir is /opgauss/data
waiting for server to shut down......... done
server stopped
[opgauss@ogsdb01 ~]$
5.2.2 检查备库状态
[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-04 23:48:59.429][13317][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Standby
static_connections : 1
db_state : Need repair
detail_information : Disconnected
Senders info:
No information
Receiver info:
No information
5.2.3 执行failover操作
[opgauss@ogsdb02 ~]$ gs_ctl failover -D /opgauss/data/
[2022-04-04 23:50:00.885][15210][][gs_ctl]: gs_ctl failover ,datadir is /opgauss/data
[2022-04-04 23:50:00.885][15210][][gs_ctl]: failover term (1)
[2022-04-04 23:50:00.892][15210][][gs_ctl]: waiting for server to failover...
.[2022-04-04 23:50:01.905][15210][][gs_ctl]: done
[2022-04-04 23:50:01.905][15210][][gs_ctl]: failover completed (/opgauss/data)
[opgauss@ogsdb02 ~]$
5.2.4 查看新主库状态(备库直接连接新主库)
[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-04 23:51:15.410][15301][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Primary
static_connections : 1
db_state : Normal
detail_information : Normal
Senders info:
No information
Receiver info:
No information
[opgauss@ogsdb02 ~]$
5.2.5 原来主库加入新集群
[opgauss@ogsdb01 ~]$ gs_ctl start -D /opgauss/data/ -M standby
5.2.6 检查新备库状态
[opgauss@ogsdb01 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-04 23:53:56.050][10432][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Standby
static_connections : 1
db_state : Normal
detail_information : Normal
Senders info:
No information
Receiver info:
receiver_pid : 10430
local_role : Standby
peer_role : Primary
peer_state : Normal
state : Normal
sender_sent_location : 0/400BCA8
sender_write_location : 0/400BCA8
sender_flush_location : 0/400BCA8
sender_replay_location : 0/400BCA8
receiver_received_location : 0/400BCA8
receiver_write_location : 0/400BCA8
receiver_flush_location : 0/400BCA8
receiver_replay_location : 0/400BCA8
sync_percent : 100%
channel : 192.168.43.201:54840<--192.168.43.127:5433
[opgauss@ogsdb01 ~]$
5.2.7 检查新主库节点状态
[opgauss@ogsdb02 ~]$ gs_ctl query -D /opgauss/data/
[2022-04-04 23:55:04.161][15523][][gs_ctl]: gs_ctl query ,datadir is /opgauss/data
HA state:
local_role : Primary
static_connections : 1
db_state : Normal
detail_information : Normal
Senders info:
sender_pid : 15448
local_role : Primary
peer_role : Standby
peer_state : Normal
state : Streaming
sender_sent_location : 0/400BDC8
sender_write_location : 0/400BDC8
sender_flush_location : 0/400BDC8
sender_replay_location : 0/400BDC8
receiver_received_location : 0/400BDC8
receiver_write_location : 0/400BDC8
receiver_flush_location : 0/400BDC8
receiver_replay_location : 0/400BDC8
sync_percent : 100%
sync_state : Sync
sync_priority : 1
sync_most_available : On
channel : 192.168.43.127:5433-->192.168.43.201:54840
Receiver info:
No information
5.2.8 检查主从复制状态
[opgauss@ogsdb02 ~]$ gsql postgres
openGauss=# select * from pg_stat_replication;
-[ RECORD 1 ]------------+---------------------------------
pid | 140224975730432
usesysid | 10
usename | opgauss
application_name | WalSender to Standby[datanode01]
client_addr | 192.168.43.201
client_hostname |
client_port | 54840
backend_start | 2022-04-05 03:53:45.699128+00
state | Streaming
sender_sent_location | 0/400BFE8
receiver_write_location | 0/400BFE8
receiver_flush_location | 0/400BFE8
receiver_replay_location | 0/400BFE8
sync_priority | 1
sync_state | Sync
openGauss=#