一、资源规划

主机名

IP地址

服务器配置

角色

数据目录

cdh01

10.111.2.10

1G 1core

主库

/home/pg12/data

cdh02

10.111.2.11

1G 1core

备库

/home/pg12/data

二、操作系统调优(主备)

1、关闭防火墙和selinux

[root@cdh01 ~]# systemctl stop firewalld

[root@cdh01 ~]# systemctl disable firewalld

[root@cdh01 ~]# systemctl status    firewalld

[root@cdh01 ~]# service iptables stop

[root@cdh01 ~]# service iptables status

禁用selinux

[root@cdh01 ~]# vi /etc/selinux/config

    SELINUX=disabled

[root@cdh01 ~]#setenforce 0 

[root@cdh01 ~]#getenforce

2、调整ulimit限制

[root@cdh01 ~]# vi /etc/security/limits.conf

* soft nofile 65536
* hard nofile 65535
* soft nproc  65536
* hard nproc  65535
* soft core   unlimited
* hard core   unlimited

3、调整内核参数

[root@cdh01 ~]# vi /etc/sysctl.conf 

    kernel.sem = 5010 641280 5010 256

[root@cdh01 ~]# sysctl -p  生效

4、调整RemoveIPC和DefaultTasksAccounting

[root@cdh01 ~]# vi /etc/systemd/logind.conf
    RemoveIPC=no

[root@cdh01 ~]# vi /etc/systemd/system.conf
    DefaultTasksAccounting=no
执行
[root@cdh01 ~]# systemctl daemon-reload
[root@cdh01 ~]# systemctl darmon-reexec
[root@cdh01 ~]# systemctl restart systemd-logind

5、调整sshd

[root@cdh01 ~]# vi /etc/ssh/sshd_config
    PermitRootLogin yes
    GSSAPIAuthentication no
    UseDNS no
[root@cdh01 ~]# systemctl restart sshd

6、配置免密登录

ssh免密登录 root-->root pg12-->pg12

节点ssh互信
cdh01:
[root@cdh01 ~]# ssh-keygen -t rsa
[pg12@cdh01 ~]$ssh-keygen -t rsa

cdh02:
[root@cdh01 ~]# ssh-keygen -t rsa
[pg12@cdh02 ~]$ssh-keygen -t rsa

cdh01:
[root@cdh01 ~]# cd .ssh/
[root@cdh01 ~/.ssh]#cat id_dsa.pub >> authorized_keys
[root@cdh01 ~/.ssh]#cat /home/pg12/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@cdh01 ~/.ssh]#ssh root@10.111.2.11 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@cdh01 ~/.ssh]#ssh pg12@10.111.2.11at /home/pg12/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[root@cdh01 ~/.ssh]#cp authorized_keys /home/pg12/.ssh/
[root@cdh01 ~/.ssh]#scp authorized_keys root@10.111.2.11:~/.ssh/
[root@cdh01 ~/.ssh]#scp authorized_keys pg12@10.111.2.11/kingbase/.ssh/
[root@cdh01 ~/.ssh]#chmod 600 authorized_keys
[pg12@cdh01 ~]$chmod 600 authorized_keys
cdh02:
[root@cdh02 ~/.ssh]#chown -R pg12.pg12 /home/pg12/.ssh/authorized_keys
[root@cdh02 ~/.ssh]#chmod 600 /home/pg12/.ssh/authorized_keys
测试
两个节点互相访问,可以直接登录无需密码说明配置成功。两个节点必须互相测试一次,否则后续集群脚本无法执行。
节点1(cdh01)上执行:

[root@cdh01 ~]#ssh  cdh02
[pg12@cdh01 ~]$ssh  pg12@cdh02
节点2(cdh02)上执行:
[root@cdh02 ~]#ssh  cdh01
[pg12@cdh02 ~]$ssh  pg12@cdh01

三、repmgr编译安装(主备)

postgresql-12.14数据库已安装完成,源码安装具体步骤可参见之前记录。数据库安装目录/home/pg12/soft,数据目录/home/pg12/data。具体介绍repmgr安装过程。

1、下载源码包

https://repmgr.org/download/repmgr-5.3.3.tar.gz

2、配置数据库环境变量

vi ~/.bash_profile

PATH=$PATH:$HOME/.local/bin:$HOME/bin:/home/pg12/soft/bin
PGDATA=/home/pg12/data
export PGDATA
export PATH

若不配置$PGDATA,编译repmgr报错:configure: error: could not find pg_config, set PG_CONFIG or PATH

[pg12@cdh02 repmgr-5.3.3]$ ./configure 
checking for a sed that does not truncate output... /bin/sed
checking for pg_config... no
configure: error: could not find pg_config, set PG_CONFIG or PATH

3、解压编译

[pg12@cdh01 ~]$ tar xf repmgr-5.3.3.tar.gz 
[pg12@cdh01 ~]$ cd repmgr-5.3.3
[pg12@cdh01 repmgr-5.3.3]$ ./configure 
checking for a sed that does not truncate output... /bin/sed
checking for pg_config... /home/pg12/soft/bin/pg_config
configure: building against PostgreSQL 14.5
checking for gnused... no
checking for gsed... no
checking for sed... yes

configure: creating ./config.status
config.status: creating Makefile
config.status: creating Makefile.global
config.status: creating config.h
[pg12@cdh01 repmgr-5.3.3]$ make install
Building against PostgreSQL 14
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr.o repmgr.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -shared -o repmgr.so repmgr.o -L/home/pg12/soft/lib    -Wl,--as-needed -Wl,-rpath,'/home/pg12/soft/lib',--enable-new-dtags  -L/home/pg12/soft/lib -lpq 
sed -E 's/REPMGR_VERSION_DATE.*""/REPMGR_VERSION_DATE "2022-10-26"/' repmgr_version.h.in >repmgr_version.h; \
sed -i -E 's/PG_ACTUAL_VERSION_NUM/PG_ACTUAL_VERSION_NUM 140005/' repmgr_version.h
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-client.o repmgr-client.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-action-primary.o repmgr-action-primary.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-action-standby.o repmgr-action-standby.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-action-witness.o repmgr-action-witness.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-action-cluster.o repmgr-action-cluster.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-action-node.o repmgr-action-node.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-action-service.o repmgr-action-service.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgr-action-daemon.o repmgr-action-daemon.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o configdata.o configdata.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o configfile.o configfile.c
flex  -o'configfile-scan.c' configfile-scan.l
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o configfile-scan.o configfile-scan.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o log.o log.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o strutil.o strutil.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o controldata.o controldata.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o dirutil.o dirutil.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o compat.o compat.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o dbutils.o dbutils.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o sysutils.o sysutils.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC repmgr-client.o repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-service.o repmgr-action-daemon.o configdata.o configfile.o configfile-scan.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o -L/home/pg12/soft/lib -lpgcommon -lpgport -L/home/pg12/soft/lib -lpq -L/home/pg12/soft/lib    -Wl,--as-needed -Wl,-rpath,'/home/pg12/soft/lib',--enable-new-dtags  -lpgcommon -lpgport -lssl -lcrypto -lz -lreadline -lpthread -lrt -ldl -lm  -o repmgr
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgrd.o repmgrd.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC -std=gnu89 -I/home/pg12/soft/include/postgresql/internal -I/home/pg12/soft/include -Wall -Wmissing-prototypes -Wmissing-declarations  -I. -I./ -I/home/pg12/soft/include/postgresql/server -I/home/pg12/soft/include/postgresql/internal  -D_GNU_SOURCE   -c -o repmgrd-physical.o repmgrd-physical.c
gcc -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wdeclaration-after-statement -Werror=vla -Wendif-labels -Wmissing-format-attribute -Wformat-security -fno-strict-aliasing -fwrapv -fexcess-precision=standard -O2 -fPIC repmgrd.o repmgrd-physical.o configdata.o configfile.o configfile-scan.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o -L/home/pg12/soft/lib -lpgcommon -lpgport -L/home/pg12/soft/lib -lpq -L/home/pg12/soft/lib    -Wl,--as-needed -Wl,-rpath,'/home/pg12/soft/lib',--enable-new-dtags  -lpgcommon -lpgport -lssl -lcrypto -lz -lreadline -lpthread -lrt -ldl -lm  -o repmgrd
/bin/mkdir -p '/home/pg12/soft/lib/postgresql'
/bin/mkdir -p '/home/pg12/soft/share/postgresql/extension'
/bin/mkdir -p '/home/pg12/soft/share/postgresql/extension'
/bin/mkdir -p '/home/pg12/soft/bin'
/bin/install -c -m 755  repmgr.so '/home/pg12/soft/lib/postgresql/repmgr.so'
/bin/install -c -m 644 .//repmgr.control '/home/pg12/soft/share/postgresql/extension/'
/bin/install -c -m 644 .//repmgr--unpackaged--4.0.sql .//repmgr--unpackaged--5.1.sql .//repmgr--unpackaged--5.2.sql .//repmgr--unpackaged--5.3.sql .//repmgr--4.0.sql .//repmgr--4.0--4.1.sql .//repmgr--4.1.sql .//repmgr--4.1--4.2.sql .//repmgr--4.2.sql .//repmgr--4.2--4.3.sql .//repmgr--4.3.sql .//repmgr--4.3--4.4.sql .//repmgr--4.4.sql .//repmgr--4.4--5.0.sql .//repmgr--5.0.sql .//repmgr--5.0--5.1.sql .//repmgr--5.1.sql .//repmgr--5.1--5.2.sql .//repmgr--5.2.sql .//repmgr--5.2--5.3.sql .//repmgr--5.3.sql  '/home/pg12/soft/share/postgresql/extension/'
/bin/install -c -m 755 repmgr repmgrd '/home/pg12/soft/bin/'

编译报错:make: flex: Command not found

yum install -y flex

编译安装默认不会生成repmgr.conf配置文件,可复制源码包的示例repmgr.conf.sample配置文件到指定目录。在使用repmgr命令需要指定repmgr.conf路径,通过-f/--config-file参数指定路径。如果找不到或无法读取该文件,则会引发错误,并且不会尝试检查默认位置。这是为了防止repmgr意外读取错误的配置文件。为了解决这个问题,可以将repmgr.con创建在pg_conf默认的sysconfidir路径下:/home/pg12/soft/etc/postgresql

[pg12@cdh01 data]$ pg_config --sysconfdir
/home/pg12/soft/etc/postgresql
本文配置文件放到:/home/pg12/conf/repmgr.conf 
[pg12@cdh01 repmgr-5.3.3]$cd /home/pg12/repmgr-5.3.3
[pg12@cdh01 repmgr-5.3.3]$cp repmgr.conf.sample /home/pg12/conf/repmgr.conf

四、主库配置

1、初始化数据库

[pg12@cdh02 bin]$ ./initdb -Upostgres -W -D /home/pg12/data -k 
The files belonging to this database system will be owned by user "pg12".
This user must also own the server process.

The database cluster will be initialized with locale "en_US.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".

Data page checksums are enabled.

Enter new superuser password: 
Enter it again: 

creating directory /home/pg12/data ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting default time zone ... PRC
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok

initdb: warning: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.

Success. You can now start the database server using:

    ./pg_ctl -D /home/pg12/data -l logfile start

2、修改数据库配置

postgresql.conf

listen_addresses = '*'
port = 5666
shared_preload_libraries = 'repmgr'

wal_log_hints = on  #开启支持pg_rewind

需要修改配置文件内容,如果直接复制port和listen_addresses到postgresql.conf文件最后,psql -p 5666 会登录不上。

pg_hba.conf添加如下策略

host    all             all             10.111.2.10/24        trust
host    all             all             10.111.2.11/24        trust
host    replication     all             10.111.2.10/24        trust   
host    replication     all             10.111.2.11/24        trust

配置为:

# "local" is for Unix domain socket connections only
local   all             all                                     trust
# IPv4 local connections:
host    all             all             127.0.0.1/32            trust
# IPv6 local connections:
host    all             all             ::1/128                 trust
# Allow replication connections from localhost, by a user with the
# replication privilege.
local   replication     all                                     trust
host    replication     all             127.0.0.1/32            trust
host    replication     all             ::1/128                 trust
host    all             all             10.111.2.10/24        trust
host    all             all             10.111.2.11/24        trust
host    replication     all             10.111.2.10/24        trust
host    replication     all             10.111.2.11/24        trust

启动数据库

[pg12@cdh01 data]$ pg_ctl start -D /home/pg12/data/
waiting for server to start....2022-10-26 17:10:38.096 CST user=,db=,id=[13508]LOG:  redirecting log output to logging collector process
2022-10-26 17:10:38.096 CST user=,db=,id=[13508]HINT:  Future log output will appear in directory "/pglog".
 done
server started

3、修改repmgr.conf

vi /home/pg12/conf/repmgr.conf 

node_id=1
node_name='node1'           
conninfo='host=10.111.2.10 port=5666 user=postgres  dbname=postgres'             
data_directory='/home/pg12/data'

4、注册主节点

[pg12@cdh01 data]$ cd /home/pg12/soft/etc/postgresql
[pg12@cdh01 postgresql]$ repmgr primary register
INFO: connecting to primary database...
NOTICE: attempting to install extension "repmgr"
NOTICE: "repmgr" extension successfully installed
NOTICE: primary node record (ID: 1) registered

[pg12@cdh01 postgresql]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 1        | host=10.111.2.10 port=5666 user=postgres  dbname=postgres

五、备库配置

1、修改repmgr.conf

vi /home/pg12/conf/repmgr.conf 

node_id=2
node_name='node2'           
conninfo='host=10.111.2.11 port=5666 user=postgres  dbname=postgres'             
data_directory='/home/pg12/data'

2、克隆备库

[pg12@localhost conf]$ repmgr standby clone -h  10.111.2.10 -p5666 -U postgres
NOTICE: destination directory "/home/pg12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=10.111.2.10 port=5666 user=postgres
DETAIL: current installation size is 23 MB
INFO: replication slot usage not requested;  no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: creating directory "/home/pg12/data"...
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
  pg_basebackup -l "repmgr base backup"  -D /home/pg12/data -h 10.111.2.10 -p 5666 -U postgres -X stream 
pg_basebackup: error while loading shared libraries: libpq.so.5: cannot open shared object file: No such file or directory
ERROR: unable to take a base backup of the source server
HINT: data directory ("/home/pg12/data") may need to be cleaned up manually

pg_basebackup克隆报错:error while loading shared libraries: libpq.so.5: cannot open shared object file: No such file or directory.

[pg12@localhost ~]$ cd soft/bin/
[pg12@localhost bin]$ ldd pg_basebackup 
        linux-vdso.so.1 =>  (0x00007ffdfdf8d000)
        libpq.so.5 => not found
        libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fbd45d24000)
        libz.so.1 => /lib64/libz.so.1 (0x00007fbd45b0e000)
        libc.so.6 => /lib64/libc.so.6 (0x00007fbd45740000)
        /lib64/ld-linux-x86-64.so.2 (0x00007fbd45f40000)

解决办法:(主备数据库都要执行)

[root@localhost ~]# cp /home/pg12/soft/lib/libpq.so.5.12 /lib64/
[root@localhost ~]# ln -s /lib64/libpq.so.5.12 /lib64/libpq.so.5
[root@localhost ~]# su - pg12
Last login: Thu Oct 26 23:53:18 CST 2023 on pts/0
[pg12@localhost ~]$ cd soft/bin/
[pg12@localhost bin]$ ldd pg_basebackup 
        linux-vdso.so.1 =>  (0x00007fffe0597000)
        libpq.so.5 => /lib64/libpq.so.5 (0x00007f5b59b18000)
        libpthread.so.0 => /lib64/libpthread.so.0 (0x00007f5b598fc000)
        libz.so.1 => /lib64/libz.so.1 (0x00007f5b596e6000)
        libc.so.6 => /lib64/libc.so.6 (0x00007f5b59318000)
        libssl.so.10 => /lib64/libssl.so.10 (0x00007f5b590a6000)
        libcrypto.so.10 => /lib64/libcrypto.so.10 (0x00007f5b58c43000)
        /lib64/ld-linux-x86-64.so.2 (0x00007f5b59d61000)
        libgssapi_krb5.so.2 => /lib64/libgssapi_krb5.so.2 (0x00007f5b589f6000)
        libkrb5.so.3 => /lib64/libkrb5.so.3 (0x00007f5b5870d000)
        libcom_err.so.2 => /lib64/libcom_err.so.2 (0x00007f5b58509000)
        libk5crypto.so.3 => /lib64/libk5crypto.so.3 (0x00007f5b582d6000)
        libdl.so.2 => /lib64/libdl.so.2 (0x00007f5b580d2000)
        libkrb5support.so.0 => /lib64/libkrb5support.so.0 (0x00007f5b57ec2000)
        libkeyutils.so.1 => /lib64/libkeyutils.so.1 (0x00007f5b57cbe000)
        libresolv.so.2 => /lib64/libresolv.so.2 (0x00007f5b57aa4000)
        libselinux.so.1 => /lib64/libselinux.so.1 (0x00007f5b5787d000)
        libpcre.so.1 => /lib64/libpcre.so.1 (0x00007f5b5761b000)

再次克隆备库

[pg12@localhost conf]$ repmgr standby clone -h  10.111.2.10 -p5666 -U postgres
NOTICE: destination directory "/home/pg14/data" provided
INFO: connecting to source node
DETAIL: connection string is: host= 10.111.2.11 user=postgres port=5666
DETAIL: current installation size is 42 MB
INFO: replication slot usage not requested;  no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/home/pg14/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
  pg_basebackup -l "repmgr base backup"  -D /home/pg14/data -h  10.111.2.11 -p 5666 -U postgres -X stream 
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /home/pg14/data start
HINT: after starting the server, you need to re-register this standby with "repmgr standby register --force" to update the existing node record

3、启动并注册

[pg12@cdh02 data]$ pg_ctl start
waiting for server to start....2022-10-27 16:15:39.251 CST user=,db=,id=[25176]LOG:  redirecting log output to logging collector process
2022-10-27 16:15:39.251 CST user=,db=,id=[25176]HINT:  Future log output will appear in directory "/pglog".
 done
server started
[pg12@cdh02 etc]$ cd /home/pg12/soft/etc
[pg12@cdh02 etc]$ repmgr standby register
INFO: connecting to local node "node2" (ID: 2)
INFO: connecting to primary database
WARNING: --upstream-node-id not supplied, assuming upstream node is primary (node ID: 1)
INFO: standby registration complete
NOTICE: standby node "node2" (ID: 2) successfully registered
[pg12@cdh02 etc]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 1        | host=10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | standby |   running | node1    | default  | 100      | 1        | host=10.111.2.11 port=5666 user=postgres dbname=postgres

六、启动repmgrd服务(主备)

1、查看repmgr服务状态

[pg12@cdh01 ~]$ repmgr service status
 ID | Name  | Role    | Status    | Upstream | repmgrd     | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
 1  | node1 | primary | * running |          | not running | n/a | n/a     | n/a                
 2  | node2 | standby |   running | node1    | not running | n/a | n/a     | n/a

添加配置

vi /home/pg12/conf/repmgr.conf 
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/soft/etc/postgresql/repmgr.conf  --pid-file /tmp/repmgrd.pid  --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'

2、启动repmgrd服务

[pg12@cdh01 ~]$ repmgr service status
 ID | Name  | Role    | Status    | Upstream | repmgrd     | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
 1  | node1 | primary | * running |          | not running | n/a | n/a     | n/a                
 2  | node2 | standby |   running | node1    | not running | n/a | n/a     | n/a                
[pg12@cdh01 ~]$ repmgrd -d
[2022-10-26 23:18:08] [NOTICE] repmgrd (repmgrd 5.3.3) starting up
[2022-10-26 23:18:08] [INFO] connecting to database "host=10.111.2.10 port=5666 user=postgres  dbname=postgres"
[pg12@cdh01 ~]$ INFO:  set_repmgrd_pid(): provided pidfile is /tmp/repmgrd.pid
[2022-10-26 23:18:08] [NOTICE] starting monitoring of node "node1" (ID: 1)
[2022-10-26 23:18:08] [INFO] "connection_check_type" set to "ping"
[2022-10-26 23:18:08] [NOTICE] monitoring cluster primary "node1" (ID: 1)
[2022-10-26 23:18:08] [INFO] child node "node2" (ID: 2) is attached

[pg12@cdh01 ~]$ repmgr service status
 ID | Name  | Role    | Status    | Upstream | repmgrd     | PID   | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-------+---------+--------------------
 1  | node1 | primary | * running |          | running     | 14467 | no      | n/a                
 2  | node2 | standby |   running | node1    | not running | n/a   | n/a     | n/a

[pg12@cdh02 ~]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 1        | host=10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | standby |   running | node1    | default  | 100      | 1        | host=10.111.2.11 port=5666 user=postgres dbname=postgres 
[pg12@cdh02 ~]$ repmgr service status
 ID | Name  | Role    | Status    | Upstream | repmgrd     | PID   | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-------+---------+--------------------
 1  | node1 | primary | * running |          | running     | 14467 | no      | n/a                
 2  | node2 | standby |   running | node1    | not running | n/a   | n/a     | n/a                
[pg12@cdh02 ~]$ repmgrd -d
[2022-10-27 16:33:06] [NOTICE] repmgrd (repmgrd 5.3.3) starting up
[2022-10-27 16:33:06] [INFO] connecting to database "host=10.111.2.11 port=5666 user=postgres dbname=postgres"
[pg12@cdh02 ~]$ INFO:  set_repmgrd_pid(): provided pidfile is /tmp/repmgrd.pid
[2022-10-27 16:33:06] [NOTICE] starting monitoring of node "node2" (ID: 2)
[2022-10-27 16:33:06] [INFO] "connection_check_type" set to "ping"
[2022-10-27 16:33:06] [INFO] monitoring connection to upstream node "node1" (ID: 1)

[pg12@cdh02 ~]$ repmgr service status
 ID | Name  | Role    | Status    | Upstream | repmgrd | PID   | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+-------+---------+--------------------
 1  | node1 | primary | * running |          | running | 14467 | no      | n/a                
 2  | node2 | standby |   running | node1    | running | 26051 | no      | 0 second(s) ago

3、添加备库为witness节点

[pg12@cdh02 ~]$ repmgr witness register -h 10.111.2.11
INFO: connecting to witness node "node2" (ID: 2)
ERROR: provided node is a standby
HINT: a witness node must run on an independent primary server

4、配置repmgr开机启动

root@cdh01 system]# vi /etc/systemd/system/repmgrd.service 

[Unit]
Description=repmgrd.service
After=network.target
[Service]
Type=forking
User=pg12
Group=pg12
WorkingDirectory=/home/pg12/soft
ExecStart=/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf  --pid-file /tmp/repmgrd.pid  --daemonize
ExecStop=kill `cat /tmp/repmgrd.pid`
PrivateTmp=false
[Install]
WantedBy=multi-user.target
[root@cdh01 ~]# systemctl enable repmgrd.service
Created symlink from /etc/systemd/system/multi-user.target.wants/repmgrd.service to /etc/systemd/system/repmgrd.service.
[root@cdh01 ~]# systemctl start repmgrd.service
[root@cdh01 ~]# ps -ef|grep repmgr
pg12      17740  16852  0 20:54 ?        00:00:00 postgres: repmgr repmgr 10.111.2.10(42135) idle
pg12      17743      1  0 20:54 ?        00:00:00 /home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize
root      17753  15961  0 20:54 pts/1    00:00:00 grep --color=auto repmgr
[root@cdh01 ~]# systemctl stop repmgrd.service
[root@cdh01 ~]# ps -ef|grep repmgr
root      17778  15961  0 20:55 pts/1    00:00:00 grep --color=auto repmgr

七、故障转移自动切换

1、添加配置自动切换(主备)

failover='automatic'
promote_command='/home/pg12/soft/bin/repmgr standby promote -f /home/pg12/soft/etc/postgresql/repmgr.conf'   
follow_command='/home/pg12/soft/bin/repmgr standby follow -f /home/pg12/soft/etc/postgresql/repmgr.conf 
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf  --pid-file /tmp/repmgrd.pid  --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'

2、重启repmgrd服务(主备)

使配置生效

[pg12@cdh01 ~]$cd /home/pg12/conf
[pg12@cdh01 ~]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped
[pg12@cdh01 ~]$ repmgr daemon status
 ID | Name  | Role    | Status    | Upstream | repmgrd     | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
 1  | node1 | standby |   running | node2    | not running | n/a | n/a     | n/a                
 2  | node2 | primary | * running |          | not running | n/a | n/a     | n/a                
[pg12@cdh01 ~]$ repmgr daemon start
NOTICE: executing: "/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf  --pid-file /tmp/repmgrd.pid  --daemonize"
NOTICE: repmgrd was successfully started
[pg12@cdh01 ~]$ repmgr daemon status
 ID | Name  | Role    | Status    | Upstream | repmgrd | PID  | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+------+---------+--------------------
 1  | node1 | standby |   running | node2    | running | 9249 | no      | 0 second(s) ago    
 2  | node2 | primary | * running |          | running | 7857 | no      | n/a

3、模拟主库故障,标记为备库。

[pg12@cdh01 ~]$ pg_ctl stop
waiting for server to shut down.... done
server stopped
[pg12@cdh01 data]$ touch standby.signal

备库查看状态

[pg12@cdh02 ~]$ repmgr cluster show
 ID | Name  | Role    | Status        | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+---------------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | primary | ? unreachable | ?        | default  | 100      |          | host=10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | standby |   running     | ? node1  | default  | 100      | 1        | host=10.111.2.11 port=5666 user=postgres dbname=postgres 

WARNING: following issues were detected

  - unable to connect to node "node1" (ID: 1)
  - node "node1" (ID: 1) is registered as an active primary but is unreachable
  - unable to connect to node "node2" (ID: 2)'s upstream node "node1" (ID: 1)
  - unable to determine if node "node2" (ID: 2) is attached to its upstream node "node1" (ID: 1)

HINT: execute with --verbose option to see connection error messages

4、原备库提升为主库

pg12@cdh02 postgresql]$ /home/pg12/soft/bin/repmgr standby promote -f /home/pg12/soft/etc/postgresql/repmgr.conf
NOTICE: promoting standby to primary
DETAIL: promoting server "node2" (ID: 2) using pg_promote()
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
[pg12@cdh02 postgresql]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | primary | - failed  | ?        | default  | 100      |          | host=10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | primary | * running |          | default  | 100      | 2        | host=10.111.2.11 port=5666 user=postgres dbname=postgres 

WARNING: following issues were detected

  - unable to connect to node "node1" (ID: 1)

HINT: execute with --verbose option to see connection error messages

5、原主库克隆新主库

[pg12@cdh01 ~]$ repmgr standby clone  -h 10.111.2.11 -Upostgres -p5666 --force -f /home/pg12/soft/etc/postgresql/repmgr.conf 
NOTICE: destination directory "/home/pg12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=10.111.2.11ser=postgres port=5666
DETAIL: current installation size is 42 MB
INFO: replication slot usage not requested;  no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/home/pg12/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
  pg_basebackup -l "repmgr base backup"  -D /home/pg12/data -h 10.111.2.11 5666 -U postgres -X stream 
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: pg_ctl -D /home/pg12/data start
HINT: after starting the server, you need to re-register this standby with "repmgr standby register --force" to update the existing node record

6、启动新备库,重新注册

[pg12@cdh01 ~]$ pg_ctl start
waiting for server to start....2022-10-27 12:07:25.819 CST user=,db=,id=[23067]LOG:  redirecting log output to logging collector process
2022-10-27 12:07:25.819 CST user=,db=,id=[23067]HINT:  Future log output will appear in directory "/pglog".
 done
server started
[pg12@cdh01 ~]$ repmgr standby register --force
INFO: connecting to local node "node1" (ID: 1)
INFO: connecting to primary database
INFO: standby registration complete
NOTICE: standby node "node1" (ID: 1) successfully registered
[pg12@cdh01 ~]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | standby |   running | node2    | default  | 100      | 2        | host=10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | primary | * running |          | default  | 100      | 2        | host=10.111.2.11 port=5666 user=postgres dbname=postgres 
[pg12@cdh02 postgresql]$ repmgr service status
 ID | Name  | Role    | Status    | Upstream | repmgrd | PID   | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+-------+---------+--------------------
 1  | node1 | standby |   running | node2    | running | 14467 | no      | 0 second(s) ago    
 2  | node2 | primary | * running |          | running | 26051 | no      | n/a

7、自动切换

1)、主备repmgr.conf

vi /home/pg12/conf/repmgr.

node_id=1
node_name='node1'
conninfo='host=10.111.2.10 port=5666 user=postgres dbname=postgres'
data_directory='/home/pg12/data'
promote_command='/home/pg12/soft/bin/repmgr  standby promote -f /home/pg12/conf/repmgr.conf'
follow_command='/home/pg12/soft/bin/repmgr  standby follow -f /home/pg12/conf/repmgr.conf -W --upstream-node-id=%n'
failover='automatic'
service_start_command  = 'sudo systemctl start postgresql.service'
service_stop_command    = 'sudo systemctl stop postgresql.service'
service_restart_command = 'sudo systemctl restart postgresql.service'
service_reload_command  = 'sudo systemctl reload postgresql.service'
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf  --pid-file /tmp/repmgrd.pid  --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'

vi /home/pg12/conf/repmgr.

node_id=2
node_name='node2'
conninfo='host=10.111.2.11 port=5666 user=postgres dbname=postgres'
data_directory='/home/pg12/data'
promote_command='/home/pg12/soft/bin/repmgr  standby promote -f /home/pg12/conf/repmgr.conf'
follow_command='/home/pg12/soft/bin/repmgr  standby follow -f /home/pg12/conf/repmgr.conf -W --upstream-node-id=%n'
failover='automatic'
service_start_command  = 'sudo systemctl start postgresql.service'
service_stop_command    = 'sudo systemctl stop postgresql.service'
service_restart_command = 'sudo systemctl restart postgresql.service'
service_reload_command  = 'sudo systemctl reload postgresql.service'
repmgrd_service_start_command = '/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf  --pid-file /tmp/repmgrd.pid  --daemonize'
repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'

使用systemctl启停postgres主备需要配置开机启动postgres

[root@cdh01 system]# vi /etc/systemd/system/postgresql.service 

[Unit]
Description=postgresql.service
After=network.target
[Service]
Type=forking
User=pg12
Group=pg12
WorkingDirectory=/home/pg12/soft
ExecStart=/home/pg12/soft/bin/pg_ctl start -D /home/pg12/data
ExecReload=/home/pg12/soft/bin/pg_ctl restart -D /home/pg12/data
ExecStop=/home/pg12/soft/bin/pg_ctl stop -D /home/pg12/data
PrivateTmp=true
[Install]
WantedBy=multi-user.target
[root@cdh01 system]# systemctl enable postgresql.service 
Created symlink from /etc/systemd/system/multi-user.target.wants/postgresql.service to /etc/systemd/system/postgresql.service.
[root@cdh01 system]# systemctl start postgresql.service 
[root@cdh01 system]# ps -ef|grep postgres
pg12       8610      1  0 10:30 ?        00:00:00 /home/pg12/soft/bin/postgres -D /home/pg12/data
pg12       8611   8610  0 10:30 ?        00:00:00 postgres: logger 
pg12       8612   8610  0 10:30 ?        00:00:00 postgres: startup recovering 000000020000000000000009
pg12       8613   8610  0 10:30 ?        00:00:00 postgres: checkpointer 
pg12       8614   8610  0 10:30 ?        00:00:00 postgres: background writer 
pg12       8615   8610  0 10:30 ?        00:00:00 postgres: stats collector 
pg12       8616   8610  0 10:30 ?        00:00:00 postgres: walreceiver streaming 0/9004650
root       8625   8051  0 10:30 pts/0    00:00:00 grep --color=auto postgres
[root@cdh01 system]# systemctl stop postgresql.service 
[root@cdh01 system]# ps -ef|grep postgres
root       8652   8051  0 10:30 pts/0    00:00:00 grep --color=auto postgres

主备配置pg12用户sudo权限

vi /etc/sudoers

Defaults:pg12 !requiretty
      pg12 ALL = NOPASSWD: /usr/bin/systemctl stop postgresql.service, \
        /usr/bin/systemctl start postgresql.service, \
        /usr/bin/systemctl restart postgresql.service, \
        /usr/bin/systemctl reload postgresql.service

补充说明:如上一步支持下面方式启停postgres

service_start_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data start'
service_stop_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data stop'
service_restart_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data restart'
service_reload_command = '/home/pg12/soft/bin/pg_ctl -D /home/pg12/data reload'

2)、使配置生效

[pg12@cdh01 ~]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped
[pg12@cdh01 ~]$ repmgr daemon status
 ID | Name  | Role    | Status    | Upstream | repmgrd     | PID | Paused? | Upstream last seen
----+-------+---------+-----------+----------+-------------+-----+---------+--------------------
 1  | node1 | standby |   running | node2    | not running | n/a | n/a     | n/a                
 2  | node2 | primary | * running |          | not running | n/a | n/a     | n/a                
[pg12@cdh01 ~]$ repmgr daemon start
NOTICE: executing: "/home/pg12/soft/bin/repmgrd -f /home/pg12/conf/repmgr.conf  --pid-file /tmp/repmgrd.pid  --daemonize"
NOTICE: repmgrd was successfully started
[pg12@cdh01 ~]$ repmgr daemon status
 ID | Name  | Role    | Status    | Upstream | repmgrd | PID  | Paused? | Upstream last seen
----+-------+---------+-----------+----------+---------+------+---------+--------------------
 1  | node1 | standby |   running | node2    | running | 9249 | no      | 0 second(s) ago    
 2  | node2 | primary | * running |          | running | 7857 | no      | n/a

3)、停止主库node2

[pg12@cdh02 ~]$ pg_ctl stop
waiting for server to shut down.... done
server stopped

4)、备库node1变为新主

[pg12@cdh01 ~]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 3        | host=10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | primary | - failed  | ?        | default  | 100      |          | host=10.111.2.11 port=5666 user=postgres dbname=postgres

5)、重新克隆node2为备库

[pg12@cdh02 ~]$ repmgr standby clone  -h 10.111.2.10 -Upostgres -p5666 --force -f /home/pg12/soft/etc/postgresql/repmgr.conf 
NOTICE: destination directory "/home/pg12/data" provided
INFO: connecting to source node
DETAIL: connection string is: host=10.111.2.10ser=postgres port=5666
DETAIL: current installation size is 42 MB
INFO: replication slot usage not requested;  no replication slot will be set up for this standby
NOTICE: checking for available walsenders on the source node (2 required)
NOTICE: checking replication connections can be made to the source server (2 required)
INFO: checking and correcting permissions on existing directory "/home/pg12/data"
NOTICE: starting backup (using pg_basebackup)...
HINT: this may take some time; consider using the -c/--fast-checkpoint option
INFO: executing:
  pg_basebackup -l "repmgr base backup"  -D /home/pg12/data -h 10.111.2.10 5666 -U postgres -X stream 
NOTICE: standby clone (using pg_basebackup) complete
NOTICE: you can now start your PostgreSQL server
HINT: for example: /home/pg12/soft/bin/pg_ctl -D /home/pg12/data start
HINT: after starting the server, you need to re-register this standby with "repmgr standby register --force" to update the existing node record
[pg12@cdh02 data]$ pg_ctl start
waiting for server to start....2022-10-28 12:24:18.282 CST user=,db=,id=[47760]LOG:  redirecting log output to logging collector process
2022-10-28 12:24:18.282 CST user=,db=,id=[47760]HINT:  Future log output will appear in directory "/pglog".
 done
server started
[pg12@cdh02 data]$ repmgr cluster show -f /home/pg12/soft/etc/postgresql/repmgr.conf 
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+-------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 3        | host=10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | standby |   running | node1    | default  | 100      | 3        | host=10.111.2.11 port=5666 user=postgres dbname=postgres

8、修改流复制用户和数据库为repmgr

8.1 查看集群状态,并新建用户(主库)

[pg12@localhost ~]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                         
----+-------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 3        | host= 10.111.2.10 port=5666 user=postgres  dbname=postgres
 2  | node2 | standby |   running |          | default  | 100      | 3        | host= 10.111.2.11 port=5666 user=postgres  dbname=postgres
[pg12@localhost ~]$ psql -Upostgres -p5666 postgres
psql (12.14)
Type "help" for help.

postgres=# create user repmgr with password 'repmgr' superuser replication;
CREATE ROLE
postgres=# create database repmgr owner repmgr;
CREATE DATABASE

8.2停止repmgrd服务(主备)

[pg12@localhost ~]$ cd conf
[pg12@localhost conf]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped
[pg12@localhost ~]$ cd conf
[pg12@localhost conf]$ repmgr daemon stop
NOTICE: executing: "kill `cat /tmp/repmgrd.pid`"
NOTICE: repmgrd was successfully stopped

8.3停止postgresql服务(主备)

[pg12@localhost conf]$ pg_ctl stop
waiting for server to shut down....2023-10-27 14:44:04.128 CST [2829] LOG:  received fast shutdown request
2023-10-27 14:44:04.128 CST [2829] LOG:  aborting any active transactions
2023-10-27 14:44:04.129 CST [2829] LOG:  background worker "logical replication launcher" (PID 3403) exited with exit code 1
2023-10-27 14:44:04.129 CST [2831] LOG:  shutting down
2023-10-27 14:44:04.149 CST [2829] LOG:  database system is shut down
 done
server stopped
[pg12@localhost conf]$ pg_ctl stop
waiting for server to shut down.... done
server stopped

8.4 修改repmgr.conf(主备)

vi /home/pg12/conf/repmgr.conf

主库
conninfo='host= 10.111.2.10 port=5666 user=postgres  dbname=postgres' 
改为
conninfo='host=10.111.2.10 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3'


备库
conninfo='host= 10.111.2.11 port=5666 user=postgres  dbname=postgres' 
改为
conninfo='host=10.111.2.11 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3'

8.5修改postgresql.auto.conf(主备)

vi /home/pg12/data/postgresql.auto.conf

主库
primary_conninfo = 'host=10.111.2.11 user=postgres port=5666 application_name=node1'
改为
primary_conninfo = 'host=10.111.2.11 user=repmgr port=5666 application_name=node1'


备库
primary_conninfo = 'user=postgres host=10.111.2.10 port=5666 application_name=node2'
改为
primary_conninfo = 'user=repmgr host=10.111.2.10 port=5666 application_name=node2'

8.6 启动数据库(主备)

主库
[pg12@localhost ~]$ pg_ctl start
waiting for server to start....2023-10-27 14:55:14.261 CST [18464] LOG:  starting PostgreSQL 12.14 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44), 64-bit
2023-10-27 14:55:14.261 CST [18464] LOG:  listening on IPv4 address "0.0.0.0", port 5666
2023-10-27 14:55:14.261 CST [18464] LOG:  listening on IPv6 address "::", port 5666
2023-10-27 14:55:14.262 CST [18464] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5666"
2023-10-27 14:55:14.270 CST [18465] LOG:  database system was shut down at 2023-10-27 14:44:04 CST
2023-10-27 14:55:14.273 CST [18464] LOG:  database system is ready to accept connections
 done
server started
备库
[pg12@localhost ~]$ pg_ctl start
waiting for server to start....2023-10-27 14:55:14.345 CST [15087] LOG:  starting PostgreSQL 12.14 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44), 64-bit
2023-10-27 14:55:14.345 CST [15087] LOG:  listening on IPv4 address "0.0.0.0", port 5666
2023-10-27 14:55:14.345 CST [15087] LOG:  listening on IPv6 address "::", port 5666
2023-10-27 14:55:14.346 CST [15087] LOG:  listening on Unix socket "/tmp/.s.PGSQL.5666"
2023-10-27 14:55:14.354 CST [15088] LOG:  database system was shut down in recovery at 2023-10-27 14:44:04 CST
2023-10-27 14:55:14.354 CST [15088] LOG:  entering standby mode
2023-10-27 14:55:14.355 CST [15088] LOG:  redo starts at 0/500C650
2023-10-27 14:55:14.355 CST [15088] LOG:  consistent recovery state reached at 0/500C738
2023-10-27 14:55:14.355 CST [15088] LOG:  invalid record length at 0/500C738: wanted 24, got 0
2023-10-27 14:55:14.356 CST [15087] LOG:  database system is ready to accept read only connections
2023-10-27 14:55:14.360 CST [15092] LOG:  started streaming WAL from primary at 0/5000000 on timeline 3
 done
server started

8.7 重新注册主备库

主库
[pg12@localhost ~]$ cd conf/
[pg12@localhost conf]$ repmgr primary register --force
INFO: connecting to primary database...
NOTICE: attempting to install extension "repmgr"
NOTICE: "repmgr" extension successfully installed
NOTICE: primary node record (ID: 1) registered
[pg12@localhost conf]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                                                                                               
----+-------+---------+-----------+----------+----------+----------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 3        | host=10.111.2.10 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3
 
 备库
[pg12@localhost ~]$ cd conf/
[pg12@localhost conf]$ repmgr standby register --force
INFO: connecting to local node "node2" (ID: 2)
INFO: connecting to primary database
WARNING: --upstream-node-id not supplied, assuming upstream node is primary (node ID: 1)
INFO: standby registration complete
NOTICE: standby node "node2" (ID: 2) successfully registered
[pg12@localhost conf]$ repmgr cluster show
 ID | Name  | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                                                                                               
----+-------+---------+-----------+----------+----------+----------+----------+--------------------------------------------------------------------------------------------------------------------------------------------------
 1  | node1 | primary | * running |          | default  | 100      | 3        | host=10.111.2.10 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3
 2  | node2 | standby |   running | node1    | default  | 100      | 3        | host=10.111.2.11 port=5666 user=repmgr dbname=repmgr connect_timeout=10 keepalives=1 keepalives_idle=10 keepalives_interval=1 keepalives_count=3