一、资源规划
主机名 | IP地址 | 服务器配置 | 角色 | 数据目录 |
cdh01 | 10.111.2.10 | 1G 1core | 主库 | /home/pg12/data |
cdh02 | 10.111.2.11 | 1G 1core | 备库 | /home/pg12/data |
二、操作系统调优(主备)
1、关闭防火墙和selinux
[root@cdh01 ~]# systemctl stop firewalld
[root@cdh01 ~]# systemctl disable firewalld
[root@cdh01 ~]# systemctl status firewalld
[root@cdh01 ~]# service iptables stop
[root@cdh01 ~]# service iptables status
禁用selinux
[root@cdh01 ~]# vi /etc/selinux/config
SELINUX=disabled
[root@cdh01 ~]#setenforce 0
[root@cdh01 ~]#getenforce
2、调整ulimit限制
[root@cdh01 ~]# vi /etc/security/limits.conf
* soft nofile 65536
* hard nofile 65535
* soft nproc 65536
* hard nproc 65535
* soft core unlimited
* hard core unlimited
3、调整内核参数
[root@cdh01 ~]# vi /etc/sysctl.conf
kernel.sem = 5010 641280 5010 256
[root@cdh01 ~]# sysctl -p 生效
4、调整RemoveIPC和DefaultTasksAccounting
[root@cdh01 ~]# vi /etc/systemd/logind.conf
RemoveIPC=no
[root@cdh01 ~]# vi /etc/systemd/system.conf
DefaultTasksAccounting=no
执行
[root@cdh01 ~]# systemctl daemon-reload
[root@cdh01 ~]# systemctl darmon-reexec
[root@cdh01 ~]# systemctl restart systemd-logind
5、调整sshd
[root@cdh01 ~]# vi /etc/ssh/sshd_config
PermitRootLogin yes
GSSAPIAuthentication no
UseDNS no
[root@cdh01 ~]# systemctl restart sshd
6、配置免密登录
ssh免密登录 root-->root pg12-->pg12
节点ssh互信
cdh01:
[root@cdh01 ~]# ssh-keygen -t rsa
[pg12@cdh01 ~]$ssh-keygen -t rsa
cdh02:
[root@cdh01 ~]# ssh-keygen -t rsa
[pg12@cdh02 ~]$ssh-keygen -t rsa
cdh01:
[root@cdh01 ~]# cd .ssh/
[root@cdh01 ~/.ssh]#cat id_dsa.pub >> authorized_keys
[root@cdh01 ~/.ssh]#cat /home/pg12/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@cdh01 ~/.ssh]#ssh root@10.111.2.11 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@cdh01 ~/.ssh]#ssh pg12@10.111.2.11at /home/pg12/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@cdh01 ~/.ssh]#cp authorized_keys /home/pg12/.ssh/
[root@cdh01 ~/.ssh]#scp authorized_keys root@10.111.2.11:~/.ssh/
[root@cdh01 ~/.ssh]#scp authorized_keys pg12@10.111.2.11/kingbase/.ssh/
[root@cdh01 ~/.ssh]#chmod 600 authorized_keys
[pg12@cdh01 ~]$chmod 600 authorized_keys
cdh02:
[root@cdh02 ~/.ssh]#chown -R pg12.pg12 /home/pg12/.ssh/authorized_keys
[root@cdh02 ~/.ssh]#chmod 600 /home/pg12/.ssh/authorized_keys
测试
两个节点互相访问,可以直接登录无需密码说明配置成功。两个节点必须互相测试一次,否则后续集群脚本无法执行。
节点1(cdh01)上执行:
[root@cdh01 ~]#ssh cdh02
[pg12@cdh01 ~]$ssh pg12@cdh02
节点2(cdh02)上执行:
[root@cdh02 ~]#ssh cdh01
[pg12@cdh02 ~]$ssh pg12@cdh01
三、repmgr编译安装(主备)
postgresql-12.14数据库已安装完成,源码安装具体步骤可参见之前记录。数据库安装目录/home/pg12/soft,数据目录/home/pg12/data。具体介绍repmgr安装过程。
1、下载源码包
https://repmgr.org/download/repmgr-5.3.3.tar.gz
2、配置数据库环境变量
vi ~/.bash_profile
PATH=$PATH:$HOME/.local/bin:$HOME/bin:/home/pg12/soft/bin
PGDATA=/home/pg12/data
export PGDATA
export PATH
若不配置$PGDATA,编译repmgr报错:configure: error: could not find pg_config, set PG_CONFIG or PATH
[pg12@cdh02 repmgr-5.3.3]$ ./configure
checking for a sed that does not truncate output... /bin/sed
checking for pg_config... no
configure: error: could not find pg_config, set PG_CONFIG or PATH
3、解压编译
[pg12@cdh01 ~]$ tar xf repmgr-5.3.3.tar.gz
[pg12@cdh01 ~]$ cd repmgr-5.3.3
[pg12@cdh01 repmgr-5.3.3]$ ./configure
checking for a sed that does not truncate output... /bin/sed
checking for pg_config... /home/pg12/soft/bin/pg_config
configure: building against PostgreSQL 14.5
checking for gnused... no
checking for gsed... no
checking for sed... yes
configure: creating ./config.status
config.status: creating Makefile
config.status: creating Makefile.global
config.status: creating config.h
[pg12@cdh01 repmgr-5.3.3]$ make install
Building against PostgreSQL 14
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr.o repmgr.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -shared -o repmgr.so repmgr.o -L/home/pg12/soft/lib -Wl,--as-needed -Wl,-rpath,'/home/pg12/soft/lib',--enable-new-dtags -L/home/pg12/soft/lib -lpq
sed -E 's/REPMGR_VERSION_DATE.*""/REPMGR_VERSION_DATE "2022-10-26"/' repmgr_version.h.in >repmgr_version.h; \
sed -i -E 's/PG_ACTUAL_VERSION_NUM/PG_ACTUAL_VERSION_NUM 140005/' repmgr_version.h
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-client.o repmgr-client.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-action-primary.o repmgr-action-primary.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-action-standby.o repmgr-action-standby.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-action-witness.o repmgr-action-witness.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-action-cluster.o repmgr-action-cluster.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-action-node.o repmgr-action-node.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-action-service.o repmgr-action-service.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgr-action-daemon.o repmgr-action-daemon.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o configdata.o configdata.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o configfile.o configfile.c
flex -o'configfile-scan.c' configfile-scan.l
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o configfile-scan.o configfile-scan.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o log.o log.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o strutil.o strutil.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o controldata.o controldata.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o dirutil.o dirutil.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o compat.o compat.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o dbutils.o dbutils.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o sysutils.o sysutils.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC repmgr-client.o repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-service.o repmgr-action-daemon.o configdata.o configfile.o configfile-scan.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o -L/home/pg12/soft/lib -lpgcommon -lpgport -L/home/pg12/soft/lib -lpq -L/home/pg12/soft/lib -Wl,--as-needed -Wl,-rpath,'/home/pg12/soft/lib',--enable-new-dtags -lpgcommon -lpgport -lssl -lcrypto -lz -lreadline -lpthread -lrt -ldl -lm -o repmgr
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgrd.o repmgrd.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal -D_GNU_SOURCE -c -o repmgrd-physical.o repmgrd-physical.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC repmgrd.o repmgrd-physical.o configdata.o configfile.o configfile-scan.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o -L/home/pg12/soft/lib -lpgcommon -lpgport -L/home/pg12/soft/lib -lpq -L/home/pg12/soft/lib -Wl,--as-needed -Wl,-rpath,'/home/pg12/soft/lib',--enable-new-dtags -lpgcommon -lpgport -lssl -lcrypto -lz -lreadline -lpthread -lrt -ldl -lm -o repmgrd
/bin/mkdir -p '/home/pg12/soft/lib/postgresql'
/bin/mkdir -p '/home/pg12/soft/share/postgresql/extension'
/bin/mkdir -p '/home/pg12/soft/share/postgresql/extension'
/bin/mkdir -p '/home/pg12/soft/bin'
/bin/install -c -m 755 repmgr.so '/home/pg12/soft/lib/postgresql/repmgr.so'
/bin/install -c -m 644 .//repmgr.control '/home/pg12/soft/share/postgresql/extension/'
/bin/install -c -m 644 .//repmgr--unpackaged--4.0.sql .//repmgr--unpackaged--5.1.sql .//repmgr--unpackaged--5.2.sql .//repmgr--unpackaged--5.3.sql .//repmgr--4.0.sql .//repmgr--4.0--4.1.sql .//repmgr--4.1.sql .//repmgr--4.1--4.2.sql .//repmgr--4.2.sql .//repmgr--4.2--4.3.sql .//repmgr--4.3.sql .//repmgr--4.3--4.4.sql .//repmgr--4.4.sql .//repmgr--4.4--5.0.sql .//repmgr--5.0.sql .//repmgr--5.0--5.1.sql .//repmgr--5.1.sql .//repmgr--5.1--5.2.sql .//repmgr--5.2.sql .//repmgr--5.2--5.3.sql .//repmgr--5.3.sql '/home/pg12/soft/share/postgresql/extension/'
/bin/install -c -m 755 repmgr repmgrd '/home/pg12/soft/bin/'
编译报错:make: flex: Command not found
yum install -y flex
编译安装默认不会生成repmgr.conf配置文件,可复制源码包的示例repmgr.conf.sample配置文件到指定目录。在使用repmgr命令需要指定repmgr.conf路径,通过-f/--config-file参数指定路径。如果找不到或无法读取该文件,则会引发错误,并且不会尝试检查默认位置。这是为了防止repmgr意外读取错误的配置文件。为了解决这个问题,可以将repmgr.con创建在pg_conf默认的sysconfidir路径下:/home/pg12/soft/etc/postgresql
[pg12@cdh01 data]$ pg_config --sysconfdir
/home/pg12/soft/etc/postgresql
本文配置文件放到:/home/pg12/conf/repmgr.conf
[pg12@cdh01 repmgr-5.3.3]$cd /home/pg12/repmgr-5.3.3
[pg12@cdh01 repmgr-5.3.3]$cp repmgr.conf.sample /home/pg12/conf/repmgr.conf
四、主库配置
1、初始化数据库
[pg12@cdh02 bin]$ ./initdb -Upostgres -W -D /home/pg12/data -k
The files belonging to this database system will be owned by user "pg12".
This user must also own the server process.
The database cluster will be initialized with locale "en_US.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".
Data page checksums are enabled.
Enter new superuser password:
Enter it again:
creating directory /home/pg12/data ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting default time zone ... PRC
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok
initdb: warning: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.
Success. You can now start the database server using:
./pg_ctl -D /home/pg12/data -l logfile start
2、修改数据库配置
postgresql.conf
listen_addresses = '*'
port = 5666
shared_preload_libraries = 'repmgr'
wal_log_hints = on #开启支持pg_rewind
需要修改配置文件内容,如果直接复制port和listen_addresses到postgresql.conf文件最后,psql -p 5666 会登录不上。
pg_hba.conf添加如下策略
host all all 10.111.2.10/24 trust
host all all 10.111.2.11/24 trust
host replication all 10.111.2.10/24 trust
host replication all 10.111.2.11/24 trust
配置为:
# "local" is for Unix domain socket connections only
local all all trust
# IPv4 local connections:
host all all 127.0.0.1/32 trust
# IPv6 local connections:
host all all ::1/128 trust
# Allow replication connections from localhost, by a user with the
# replication privilege.
local replication all trust
host replication all 127.0.0.1/32 trust
host replication all ::1/128 trust
host all all 10.111.2.10/24 trust
host all all 10.111.2.11/24 trust
host replication all 10.111.2.10/24 trust
host replication all 10.111.2.11/24 trust
启动数据库
[pg12@cdh01 data]$ pg_ctl start -D /home/pg12/data/
waiting for server to start....2022-10-26 17:10:38.096 CST user=,db=,id=[13508]LOG: redirecting log output to logging collector process
2022-10-26 17:10:38.096 CST user=,db=,id=[13508]HINT: Future log output will appear in directory "/pglog".
done
server started
3、修改repmgr.conf
vi /home/pg12/conf/repmgr.conf
node_id=1
node_name='node1'
conninfo='host=10.111.2.10 port=5666 user=postgres dbname=postgres'
data_directory='/home/pg12/data'
4、注册主节点
[pg12@cdh01 data]$ cd /home/pg12/soft/etc/postgresql
[pg12@cdh01 postgresql]$ repmgr primary register
INFO: connecting to primary database...
NOTICE: attempting to install extension "repmgr"
NOTICE: "repmgr" extension successfully installed
NOTICE: primary node record (ID: 1) registered
[pg12@cdh01 postgresql]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 1 | host=10.111.2.10 port=5666 user=postgres dbname=postgres
五、备库配置
1、修改repmgr.conf
vi /home/pg12/conf/repmgr.conf
node_id=2
node_name='node2'
conninfo='host=10.111.2.11 port=5666 user=postgres dbname=postgres'
data_directory='/home/pg12/data'
2、克隆备库
[pg12@localhost conf]$ repmgr standby clone -h 10.111.2.10 -p5666 -U postgres
NOTICE: destination directory "/home/pg12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=10.111.2.10 port=5666 user=postgres
DETAIL: current installation size is 23 MB
INFO: replication slot usage not requested; no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: creating directory "/home/pg12/data"...
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
pg_basebackup -l "repmgr base backup" -D /home/pg12/data -h 10.111.2.10 -p 5666 -U postgres -X stream
pg_basebackup: error while loading shared libraries: libpq.so.5: cannot open shared object file: No such file or directory
ERROR: unable to take a base backup of the source server
HINT: data directory ("/home/pg12/data") may need to be cleaned up manually
pg_basebackup克隆报错:error while loading shared libraries: libpq.so.5: cannot open shared object file: No such file or directory.
[pg12@localhost ~]$ cd soft/bin/
[pg12@localhost bin]$ ldd pg_basebackup
linux-vdso.so.1 => (0x00007ffdfdf8d000)
libpq.so.5 => not found
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fbd45d24000)
libz.so.1 => /lib64/libz.so.1 (0x00007fbd45b0e000)
libc.so.6 => /lib64/libc.so.6 (0x00007fbd45740000)
/lib64/ld-linux-x86-64.so.2 (0x00007fbd45f40000)
解决办法:(主备数据库都要执行)
[root@localhost ~]# cp /home/pg12/soft/lib/libpq.so.5.12 /lib64/
[root@localhost ~]# ln -s /lib64/libpq.so.5.12 /lib64/libpq.so.5
[root@localhost ~]# su - pg12
Last login: Thu Oct 26 23:53:18 CST 2023 on pts/0
[pg12@localhost ~]$ cd soft/bin/
[pg12@localhost bin]$ ldd pg_basebackup
linux-vdso.so.1 => (0x00007fffe0597000)
libpq.so.5 => /lib64/libpq.so.5 (0x00007f5b59b18000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f5b598fc000)
libz.so.1 => /lib64/libz.so.1 (0x00007f5b596e6000)
libc.so.6 => /lib64/libc.so.6 (0x00007f5b59318000)
libssl.so.10 => /lib64/libssl.so.10 (0x00007f5b590a6000)
libcrypto.so.10 => /lib64/libcrypto.so.10 (0x00007f5b58c43000)
/lib64/ld-linux-x86-64.so.2 (0x00007f5b59d61000)
libgssapi_krb5.so.2 => /lib64/libgssapi_krb5.so.2 (0x00007f5b589f6000)
libkrb5.so.3 => /lib64/libkrb5.so.3 (0x00007f5b5870d000)
libcom_err.so.2 => /lib64/libcom_err.so.2 (0x00007f5b58509000)
libk5crypto.so.3 => /lib64/libk5crypto.so.3 (0x00007f5b582d6000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007f5b580d2000)
libkrb5support.so.0 => /lib64/libkrb5support.so.0 (0x00007f5b57ec2000)
libkeyutils.so.1 => /lib64/libkeyutils.so.1 (0x00007f5b57cbe000)
libresolv.so.2 => /lib64/libresolv.so.2 (0x00007f5b57aa4000)
libselinux.so.1 => /lib64/libselinux.so.1 (0x00007f5b5787d000)
libpcre.so.1 => /lib64/libpcre.so.1 (0x00007f5b5761b000)
再次克隆备库
[pg12@localhost conf]$ repmgr standby clone -h 10.111.2.10 -p5666 -U postgres
NOTICE: destination directory "/home/pg14/data" provided
INFO: connecting to source node
DETAIL: connection string is: host= 10.111.2.11 user=postgres port=5666
DETAIL: current installation size is 42 MB
INFO: replication slot usage not requested; no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/home/pg14/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
pg_basebackup -l "repmgr base backup" -D /home/pg14/data -h 10.111.2.11 -p 5666 -U postgres -X stream
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /home/pg14/data start
HINT: after starting the server, you need to re-register this standby with "repmgr standby register --force" to update the existing node record
3、启动并注册
[pg12@cdh02 data]$ pg_ctl start
waiting for server to start....2022-10-27 16:15:39.251 CST user=,db=,id=[25176]LOG: redirecting log output to logging collector process
2022-10-27 16:15:39.251 CST user=,db=,id=[25176]HINT: Future log output will appear in directory "/pglog".
done
server started
[pg12@cdh02 etc]$ cd /home/pg12/soft/etc
[pg12@cdh02 etc]$ repmgr standby register
INFO: connecting to local node "node2" (ID: 2)
INFO: connecting to primary database
WARNING: --upstream-node-id not supplied, assuming upstream node is primary (node ID: 1)
INFO: standby registration complete
NOTICE: standby node "node2" (ID: 2) successfully registered
[pg12@cdh02 etc]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 1 | host=10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | standby | running | node1 | default | 100 | 1 | host=10.111.2.11 port=5666 user=postgres dbname=postgres
六、启动repmgrd服务(主备)
1、查看repmgr服务状态
[pg12@cdh01 ~]$ repmgr service status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
1 | node1 | primary | * running | | not running | n/a | n/a | n/a
2 | node2 | standby | running | node1 | not running | n/a | n/a | n/a
添加配置
vi /home/pg12/conf/repmgr.conf
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/soft/etc/postgresql/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'
2、启动repmgrd服务
主
[pg12@cdh01 ~]$ repmgr service status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
1 | node1 | primary | * running | | not running | n/a | n/a | n/a
2 | node2 | standby | running | node1 | not running | n/a | n/a | n/a
[pg12@cdh01 ~]$ repmgrd -d
[2022-10-26 23:18:08] [NOTICE] repmgrd (repmgrd 5.3.3) starting up
[2022-10-26 23:18:08] [INFO] connecting to database "host=10.111.2.10 port=5666 user=postgres dbname=postgres"
[pg12@cdh01 ~]$ INFO: set_repmgrd_pid(): provided pidfile is /tmp/repmgrd.pid
[2022-10-26 23:18:08] [NOTICE] starting monitoring of node "node1" (ID: 1)
[2022-10-26 23:18:08] [INFO] "connection_check_type" set to "ping"
[2022-10-26 23:18:08] [NOTICE] monitoring cluster primary "node1" (ID: 1)
[2022-10-26 23:18:08] [INFO] child node "node2" (ID: 2) is attached
[pg12@cdh01 ~]$ repmgr service status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-------+---------+--------------------
1 | node1 | primary | * running | | running | 14467 | no | n/a
2 | node2 | standby | running | node1 | not running | n/a | n/a | n/a
备
[pg12@cdh02 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 1 | host=10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | standby | running | node1 | default | 100 | 1 | host=10.111.2.11 port=5666 user=postgres dbname=postgres
[pg12@cdh02 ~]$ repmgr service status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-------+---------+--------------------
1 | node1 | primary | * running | | running | 14467 | no | n/a
2 | node2 | standby | running | node1 | not running | n/a | n/a | n/a
[pg12@cdh02 ~]$ repmgrd -d
[2022-10-27 16:33:06] [NOTICE] repmgrd (repmgrd 5.3.3) starting up
[2022-10-27 16:33:06] [INFO] connecting to database "host=10.111.2.11 port=5666 user=postgres dbname=postgres"
[pg12@cdh02 ~]$ INFO: set_repmgrd_pid(): provided pidfile is /tmp/repmgrd.pid
[2022-10-27 16:33:06] [NOTICE] starting monitoring of node "node2" (ID: 2)
[2022-10-27 16:33:06] [INFO] "connection_check_type" set to "ping"
[2022-10-27 16:33:06] [INFO] monitoring connection to upstream node "node1" (ID: 1)
[pg12@cdh02 ~]$ repmgr service status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+-------+---------+--------------------
1 | node1 | primary | * running | | running | 14467 | no | n/a
2 | node2 | standby | running | node1 | running | 26051 | no | 0 second(s) ago
3、添加备库为witness节点
[pg12@cdh02 ~]$ repmgr witness register -h 10.111.2.11
INFO: connecting to witness node "node2" (ID: 2)
ERROR: provided node is a standby
HINT: a witness node must run on an independent primary server
4、配置repmgr开机启动
root@cdh01 system]# vi /etc/systemd/system/repmgrd.service
[Unit]
Description=repmgrd.service
After=network.target
[Service]
Type=forking
User=pg12
Group=pg12
WorkingDirectory=/home/pg12/soft
ExecStart=/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize
ExecStop=kill `cat /tmp/repmgrd.pid`
PrivateTmp=false
[Install]
WantedBy=multi-user.target
[root@cdh01 ~]# systemctl enable repmgrd.service
Created symlink from /etc/systemd/system/multi-user.target.wants/repmgrd.service to /etc/systemd/system/repmgrd.service.
[root@cdh01 ~]# systemctl start repmgrd.service
[root@cdh01 ~]# ps -ef|grep repmgr
pg12 17740 16852 0 20:54 ? 00:00:00 postgres: repmgr repmgr 10.111.2.10(42135) idle
pg12 17743 1 0 20:54 ? 00:00:00 /home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize
root 17753 15961 0 20:54 pts/1 00:00:00 grep --color=auto repmgr
[root@cdh01 ~]# systemctl stop repmgrd.service
[root@cdh01 ~]# ps -ef|grep repmgr
root 17778 15961 0 20:55 pts/1 00:00:00 grep --color=auto repmgr
七、故障转移自动切换
1、添加配置自动切换(主备)
failover='automatic'
promote_command='/home/pg12/soft/bin/repmgr standby promote -f /home/pg12/soft/etc/postgresql/repmgr.conf'
follow_command='/home/pg12/soft/bin/repmgr standby follow -f /home/pg12/soft/etc/postgresql/repmgr.conf
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'
2、重启repmgrd服务(主备)
使配置生效
[pg12@cdh01 ~]$cd /home/pg12/conf
[pg12@cdh01 ~]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped
[pg12@cdh01 ~]$ repmgr daemon status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
1 | node1 | standby | running | node2 | not running | n/a | n/a | n/a
2 | node2 | primary | * running | | not running | n/a | n/a | n/a
[pg12@cdh01 ~]$ repmgr daemon start
NOTICE: executing: "/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize"
NOTICE: repmgrd was successfully started
[pg12@cdh01 ~]$ repmgr daemon status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+------+---------+--------------------
1 | node1 | standby | running | node2 | running | 9249 | no | 0 second(s) ago
2 | node2 | primary | * running | | running | 7857 | no | n/a
3、模拟主库故障,标记为备库。
[pg12@cdh01 ~]$ pg_ctl stop
waiting for server to shut down.... done
server stopped
[pg12@cdh01 data]$ touch standby.signal
备库查看状态
[pg12@cdh02 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+---------------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | primary | ? unreachable | ? | default | 100 | | host=10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | standby | running | ? node1 | default | 100 | 1 | host=10.111.2.11 port=5666 user=postgres dbname=postgres
WARNING: following issues were detected
- unable to connect to node "node1" (ID: 1)
- node "node1" (ID: 1) is registered as an active primary but is unreachable
- unable to connect to node "node2" (ID: 2)'s upstream node "node1" (ID: 1)
- unable to determine if node "node2" (ID: 2) is attached to its upstream node "node1" (ID: 1)
HINT: execute with --verbose option to see connection error messages
4、原备库提升为主库
pg12@cdh02 postgresql]$ /home/pg12/soft/bin/repmgr standby promote -f /home/pg12/soft/etc/postgresql/repmgr.conf
NOTICE: promoting standby to primary
DETAIL: promoting server "node2" (ID: 2) using pg_promote()
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
[pg12@cdh02 postgresql]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | primary | - failed | ? | default | 100 | | host=10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | primary | * running | | default | 100 | 2 | host=10.111.2.11 port=5666 user=postgres dbname=postgres
WARNING: following issues were detected
- unable to connect to node "node1" (ID: 1)
HINT: execute with --verbose option to see connection error messages
5、原主库克隆新主库
[pg12@cdh01 ~]$ repmgr standby clone -h 10.111.2.11 -Upostgres -p5666 --force -f /home/pg12/soft/etc/postgresql/repmgr.conf
NOTICE: destination directory "/home/pg12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=10.111.2.11ser=postgres port=5666
DETAIL: current installation size is 42 MB
INFO: replication slot usage not requested; no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/home/pg12/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
pg_basebackup -l "repmgr base backup" -D /home/pg12/data -h 10.111.2.11 5666 -U postgres -X stream
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /home/pg12/data start
HINT: after starting the server, you need to re-register this standby with "repmgr standby register --force" to update the existing node record
6、启动新备库,重新注册
[pg12@cdh01 ~]$ pg_ctl start
waiting for server to start....2022-10-27 12:07:25.819 CST user=,db=,id=[23067]LOG: redirecting log output to logging collector process
2022-10-27 12:07:25.819 CST user=,db=,id=[23067]HINT: Future log output will appear in directory "/pglog".
done
server started
[pg12@cdh01 ~]$ repmgr standby register --force
INFO: connecting to local node "node1" (ID: 1)
INFO: connecting to primary database
INFO: standby registration complete
NOTICE: standby node "node1" (ID: 1) successfully registered
[pg12@cdh01 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | standby | running | node2 | default | 100 | 2 | host=10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | primary | * running | | default | 100 | 2 | host=10.111.2.11 port=5666 user=postgres dbname=postgres
[pg12@cdh02 postgresql]$ repmgr service status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+-------+---------+--------------------
1 | node1 | standby | running | node2 | running | 14467 | no | 0 second(s) ago
2 | node2 | primary | * running | | running | 26051 | no | n/a
7、自动切换
1)、主备repmgr.conf
主
vi /home/pg12/conf/repmgr.
node_id=1
node_name='node1'
conninfo='host=10.111.2.10 port=5666 user=postgres dbname=postgres'
data_directory='/home/pg12/data'
promote_command='/home/pg12/soft/bin/repmgr standby promote -f /home/pg12/conf/repmgr.conf'
follow_command='/home/pg12/soft/bin/repmgr standby follow -f /home/pg12/conf/repmgr.conf -W --upstream-node-id=%n'
failover='automatic'
service_start_command = 'sudo systemctl start postgresql.service'
service_stop_command = 'sudo systemctl stop postgresql.service'
service_restart_command = 'sudo systemctl restart postgresql.service'
service_reload_command = 'sudo systemctl reload postgresql.service'
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'
备
vi /home/pg12/conf/repmgr.
node_id=2
node_name='node2'
conninfo='host=10.111.2.11 port=5666 user=postgres dbname=postgres'
data_directory='/home/pg12/data'
promote_command='/home/pg12/soft/bin/repmgr standby promote -f /home/pg12/conf/repmgr.conf'
follow_command='/home/pg12/soft/bin/repmgr standby follow -f /home/pg12/conf/repmgr.conf -W --upstream-node-id=%n'
failover='automatic'
service_start_command = 'sudo systemctl start postgresql.service'
service_stop_command = 'sudo systemctl stop postgresql.service'
service_restart_command = 'sudo systemctl restart postgresql.service'
service_reload_command = 'sudo systemctl reload postgresql.service'
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'
使用systemctl启停postgres主备需要配置开机启动postgres
[root@cdh01 system]# vi /etc/systemd/system/postgresql.service
[Unit]
Description=postgresql.service
After=network.target
[Service]
Type=forking
User=pg12
Group=pg12
WorkingDirectory=/home/pg12/soft
ExecStart=/home/pg12/soft/bin/pg_ctl start -D /home/pg12/data
ExecReload=/home/pg12/soft/bin/pg_ctl restart -D /home/pg12/data
ExecStop=/home/pg12/soft/bin/pg_ctl stop -D /home/pg12/data
PrivateTmp=true
[Install]
WantedBy=multi-user.target
[root@cdh01 system]# systemctl enable postgresql.service
Created symlink from /etc/systemd/system/multi-user.target.wants/postgresql.service to /etc/systemd/system/postgresql.service.
[root@cdh01 system]# systemctl start postgresql.service
[root@cdh01 system]# ps -ef|grep postgres
pg12 8610 1 0 10:30 ? 00:00:00 /home/pg12/soft/bin/postgres -D /home/pg12/data
pg12 8611 8610 0 10:30 ? 00:00:00 postgres: logger
pg12 8612 8610 0 10:30 ? 00:00:00 postgres: startup recovering 000000020000000000000009
pg12 8613 8610 0 10:30 ? 00:00:00 postgres: checkpointer
pg12 8614 8610 0 10:30 ? 00:00:00 postgres: background writer
pg12 8615 8610 0 10:30 ? 00:00:00 postgres: stats collector
pg12 8616 8610 0 10:30 ? 00:00:00 postgres: walreceiver streaming 0/9004650
root 8625 8051 0 10:30 pts/0 00:00:00 grep --color=auto postgres
[root@cdh01 system]# systemctl stop postgresql.service
[root@cdh01 system]# ps -ef|grep postgres
root 8652 8051 0 10:30 pts/0 00:00:00 grep --color=auto postgres
主备配置pg12用户sudo权限
vi /etc/sudoers
Defaults:pg12 !requiretty
pg12 ALL = NOPASSWD: /usr/bin/systemctl stop postgresql.service, \
/usr/bin/systemctl start postgresql.service, \
/usr/bin/systemctl restart postgresql.service, \
/usr/bin/systemctl reload postgresql.service
补充说明:如上一步支持下面方式启停postgres
service_start_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data start'
service_stop_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data stop'
service_restart_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data restart'
service_reload_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data reload'
2)、使配置生效
[pg12@cdh01 ~]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped
[pg12@cdh01 ~]$ repmgr daemon status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
1 | node1 | standby | running | node2 | not running | n/a | n/a | n/a
2 | node2 | primary | * running | | not running | n/a | n/a | n/a
[pg12@cdh01 ~]$ repmgr daemon start
NOTICE: executing: "/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize"
NOTICE: repmgrd was successfully started
[pg12@cdh01 ~]$ repmgr daemon status
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+------+---------+--------------------
1 | node1 | standby | running | node2 | running | 9249 | no | 0 second(s) ago
2 | node2 | primary | * running | | running | 7857 | no | n/a
3)、停止主库node2
[pg12@cdh02 ~]$ pg_ctl stop
waiting for server to shut down.... done
server stopped
4)、备库node1变为新主
[pg12@cdh01 ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 3 | host=10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | primary | - failed | ? | default | 100 | | host=10.111.2.11 port=5666 user=postgres dbname=postgres
5)、重新克隆node2为备库
[pg12@cdh02 ~]$ repmgr standby clone -h 10.111.2.10 -Upostgres -p5666 --force -f /home/pg12/soft/etc/postgresql/repmgr.conf
NOTICE: destination directory "/home/pg12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=10.111.2.10ser=postgres port=5666
DETAIL: current installation size is 42 MB
INFO: replication slot usage not requested; no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/home/pg12/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
pg_basebackup -l "repmgr base backup" -D /home/pg12/data -h 10.111.2.10 5666 -U postgres -X stream
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: /home/pg12/soft/bin/pg_ctl -D /home/pg12/data start
HINT: after starting the server, you need to re-register this standby with "repmgr standby register --force" to update the existing node record
[pg12@cdh02 data]$ pg_ctl start
waiting for server to start....2022-10-28 12:24:18.282 CST user=,db=,id=[47760]LOG: redirecting log output to logging collector process
2022-10-28 12:24:18.282 CST user=,db=,id=[47760]HINT: Future log output will appear in directory "/pglog".
done
server started
[pg12@cdh02 data]$ repmgr cluster show -f /home/pg12/soft/etc/postgresql/repmgr.conf
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 3 | host=10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | standby | running | node1 | default | 100 | 3 | host=10.111.2.11 port=5666 user=postgres dbname=postgres
8、修改流复制用户和数据库为repmgr
8.1 查看集群状态,并新建用户(主库)
[pg12@localhost ~]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 3 | host= 10.111.2.10 port=5666 user=postgres dbname=postgres
2 | node2 | standby | running | | default | 100 | 3 | host= 10.111.2.11 port=5666 user=postgres dbname=postgres
[pg12@localhost ~]$ psql -Upostgres -p5666 postgres
psql (12.14)
Type "help" for help.
postgres=# create user repmgr with password 'repmgr' superuser replication;
CREATE ROLE
postgres=# create database repmgr owner repmgr;
CREATE DATABASE
8.2停止repmgrd服务(主备)
[pg12@localhost ~]$ cd conf
[pg12@localhost conf]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped
[pg12@localhost ~]$ cd conf
[pg12@localhost conf]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped
8.3停止postgresql服务(主备)
[pg12@localhost conf]$ pg_ctl stop
waiting for server to shut down....2023-10-27 14:44:04.128 CST [2829] LOG: received fast shutdown request
2023-10-27 14:44:04.128 CST [2829] LOG: aborting any active transactions
2023-10-27 14:44:04.129 CST [2829] LOG: background worker "logical replication launcher" (PID 3403) exited with exit code 1
2023-10-27 14:44:04.129 CST [2831] LOG: shutting down
2023-10-27 14:44:04.149 CST [2829] LOG: database system is shut down
done
server stopped
[pg12@localhost conf]$ pg_ctl stop
waiting for server to shut down.... done
server stopped
8.4 修改repmgr.conf(主备)
vi /home/pg12/conf/repmgr.conf
主库
conninfo='host= 10.111.2.10 port=5666 user=postgres dbname=postgres'
改为
conninfo='host=10.111.2.10 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3'
备库
conninfo='host= 10.111.2.11 port=5666 user=postgres dbname=postgres'
改为
conninfo='host=10.111.2.11 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3'
8.5修改postgresql.auto.conf(主备)
vi /home/pg12/data/postgresql.auto.conf
主库
primary_conninfo = 'host=10.111.2.11 user=postgres port=5666 application_name=node1'
改为
primary_conninfo = 'host=10.111.2.11 user=repmgr port=5666 application_name=node1'
备库
primary_conninfo = 'user=postgres host=10.111.2.10 port=5666 application_name=node2'
改为
primary_conninfo = 'user=repmgr host=10.111.2.10 port=5666 application_name=node2'
8.6 启动数据库(主备)
主库
[pg12@localhost ~]$ pg_ctl start
waiting for server to start....2023-10-27 14:55:14.261 CST [18464] LOG: starting PostgreSQL 12.14 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44), 64-bit
2023-10-27 14:55:14.261 CST [18464] LOG: listening on IPv4 address "0.0.0.0", port 5666
2023-10-27 14:55:14.261 CST [18464] LOG: listening on IPv6 address "::", port 5666
2023-10-27 14:55:14.262 CST [18464] LOG: listening on Unix socket "/tmp/.s.PGSQL.5666"
2023-10-27 14:55:14.270 CST [18465] LOG: database system was shut down at 2023-10-27 14:44:04 CST
2023-10-27 14:55:14.273 CST [18464] LOG: database system is ready to accept connections
done
server started
备库
[pg12@localhost ~]$ pg_ctl start
waiting for server to start....2023-10-27 14:55:14.345 CST [15087] LOG: starting PostgreSQL 12.14 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44), 64-bit
2023-10-27 14:55:14.345 CST [15087] LOG: listening on IPv4 address "0.0.0.0", port 5666
2023-10-27 14:55:14.345 CST [15087] LOG: listening on IPv6 address "::", port 5666
2023-10-27 14:55:14.346 CST [15087] LOG: listening on Unix socket "/tmp/.s.PGSQL.5666"
2023-10-27 14:55:14.354 CST [15088] LOG: database system was shut down in recovery at 2023-10-27 14:44:04 CST
2023-10-27 14:55:14.354 CST [15088] LOG: entering standby mode
2023-10-27 14:55:14.355 CST [15088] LOG: redo starts at 0/500C650
2023-10-27 14:55:14.355 CST [15088] LOG: consistent recovery state reached at 0/500C738
2023-10-27 14:55:14.355 CST [15088] LOG: invalid record length at 0/500C738: wanted 24, got 0
2023-10-27 14:55:14.356 CST [15087] LOG: database system is ready to accept read only connections
2023-10-27 14:55:14.360 CST [15092] LOG: started streaming WAL from primary at 0/5000000 on timeline 3
done
server started
8.7 重新注册主备库
主库
[pg12@localhost ~]$ cd conf/
[pg12@localhost conf]$ repmgr primary register --force
INFO: connecting to primary database...
NOTICE: attempting to install extension "repmgr"
NOTICE: "repmgr" extension successfully installed
NOTICE: primary node record (ID: 1) registered
[pg12@localhost conf]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 3 | host=10.111.2.10 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3
备库
[pg12@localhost ~]$ cd conf/
[pg12@localhost conf]$ repmgr standby register --force
INFO: connecting to local node "node2" (ID: 2)
INFO: connecting to primary database
WARNING: --upstream-node-id not supplied, assuming upstream node is primary (node ID: 1)
INFO: standby registration complete
NOTICE: standby node "node2" (ID: 2) successfully registered
[pg12@localhost conf]$ repmgr cluster show
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
----+-------+---------+-----------+----------+----------+----------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------
1 | node1 | primary | * running | | default | 100 | 3 | host=10.111.2.10 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3
2 | node2 | standby | running | node1 | default | 100 | 3 | host=10.111.2.11 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3