REPMGR高可用+VIP方案

 

大纲

REPMGR软件介绍

REPMGR工作流程介绍

REPMGR架构简单讲解

REPMGR工具介绍

REPMGR安装部署

REPMGRfailover测试

REPMGRVIP漂移

一、 简介

1.1环境简介

主机名

角色

IP

开放端口

repmgr01

主库

10.10.10.201

6000

repmgr02

备库

10.10.10.202

6000



10.10.10.111

6000

 

 

1.2 架构

 

 

 

二、 环境准备

2.1环境检查

Repmgr01

 

#系统版本

[root@repmgr01 ~]# cat /etc/redhat-release

CentOS Linux release 7.8.2003 (Core)

#内核版本

[root@repmgr01 ~]# uname -r

3.10.0-1127.el7.x86_64

#IP地址

[root@repmgr01 ~]# hostname -I

10.10.10.201  

Repmgr02

 

#系统版本

[root@repmgr01 ~]# cat /etc/redhat-release

CentOS Linux release 7.8.2003 (Core)

#内核版本

[root@repmgr01 ~]# uname -r

3.10.0-1127.el7.x86_64

#IP地址

[root@repmgr01 ~]# hostname -I

10.10.10.202  

 

2.2安装PostgreSQL

2.2.1 PostgreSQL安装

 

#安装依赖包

yum -y install gcc gcc-c++ openssl openssl-devel readline readline-devel zlib zlib-devel llvm5.0 llvm5.0-devel libxml2 libxml2-devel clang flex bison libevent-devel

#编译解压包

 

./configure --prefix=/home/postgres/soft  --with-pgport=6000

#安装

gmake world -j24 && gmake install-world -j24

 

#配置环境变量

echo "export PGHOME=/home/postgres/soft" >> /home/postgres/.bashrc

echo "export PGDATA=/home/postgres/data" >> /home/postgres/.bashrc

echo "export PATH=\${PGHOME}/bin:\${PATH}" >> /home/postgres/.bashrc



#读取环境变量

source ~/.bashrc

 

#初始化数据库并启用 checksum校验

initdb -D $PGDATA -k

 

#启动数据库

pg_ctl start -D $PGDATA -l /tmp/logfile           

 

#如果没有用postgres用户,则需要创建创建 postgres 超级用户并设置密码为 1qaz@WSX

CREATE USER postgres WITH SUPERUSER PASSWORD '1qaz@WSX';

#如果主机用户有postgres用户,修改密码

alter user postgres with password '1qaz@WSX';

 

#查询密码  

psql -U postgres

select *from pg_shadow where usename = 'postgres';

 

#编辑 ~/.pgpass 文件,并配置 postgres 用户信息

[postgres@repmgr01 ~]$ vim .pgpass

10.10.10.201:5666:postgres:postgres:1qaz@WSX

[postgres@repmgr01 ~]$ chmod 0600 .pgpass

 




#打开日志收集功能

[postgres@repmgr01 ~]$ psql -U postgres -d postgres -Aqt

postgres=# ALTER SYSTEM SET logging_collector = on;

#日志存放路径为 /home/postgres/pglog

[postgres@repmgr01 ~]$  mkdir -p /home/postgres/pglog

[postgres@repmgr01 ~]$  chown postgres.postgres /home/postgres/pglog

#修改日志存放目录为 /home/postgres/pglog

[postgres@repmgr01 ~]$ psql -U postgres -d postgres -Aqt

postgres=# show log_directory ;

log

postgres=# ALTER SYSTEM SET log_directory = '/home/postgres/pglog';

#修改日志运行格式为 csvlog

#postgres=# show log_destination ;

postgres=# ALTER SYSTEM SET log_destination = csvlog;

#日志相关其它配置

[postgres@repmgr01 ~]$ psql -U postgres -d postgres -Aqt

 ALTER SYSTEM SET listen_addresses = '*';

 ALTER SYSTEM SET log_rotation_size = 0;

 ALTER SYSTEM SET log_truncate_on_rotation = on;

 ALTER SYSTEM SET log_line_prefix = '%m-%u-%d-%p';

 ALTER SYSTEM SET log_checkpoints = off;

 ALTER SYSTEM SET log_rotation_age = 1440;

 ALTER SYSTEM SET log_filename = 'postgresql.%d';

 ALTER SYSTEM SET full_page_writes = on;    

 

#修改postgresql.conf

 

vi /home/postgres/data/postgresql.conf

port=6000

cluster_name= 'repmgr01'

listen_addresses = '*'

hot_standby=on

wal_log_hints=on

logging_collector=on

log_file_mode=0600

#放到最后

shared_preload_libraries='repmgr'

[pg10@repmgr01 ~]$ egrep -v "^#" $PGDATA/pg_hba.conf | egrep "md5"

local   all             postgres                                md5

host    all             postgres        0.0.0.0/0               md5

host    all             all             0.0.0.0/0               md5  

 

 

#重新启动数据库

$ pg_ctl  restart -D $PGDATA -l /tmp/logfile

 

2.3安装Repmgr

2.3.1 Repmgr安装

#编译安装

[postgres@repmgr01 resource]$ cd repmgr-5.1.0/

[postgres@repmgr01 repmgr-5.1.0]$ ./configure --prefix=/home/postgres/soft

[postgres@repmgr01 repmgr-5.1.0]$ make -j24 && make install -j24

 

#主库创建repmgr库存储元数据

create user repmgr superuser password '1qaz@WSX' ;

create database repmgr owner repmgr;

#配置.pgpass

[postgres@repmgr01 ~]$ cat .pgpass

10.10.10.201:6000:postgres:postgres:1qaz@WSX

10.10.10.202:6000:postgres:postgres:1qaz@WSX

10.10.10.201:6000:repmgr:repmgr:1qaz@WSX

10.10.10.202:6000:repmgr:repmgr:1qaz@WSX

[postgres@repmgr01 ~]$

 

 

#创建 repmgr 用户,权限为 superuser

[postgres@repmgr01 conf]$ psql -U postgres -d postgres

psql (10.14)

Type "help" for help.

 

postgres=# CREATE USER repmgr WITH SUPERUSER PASSWORD '1qaz@WSX';

CREATE ROLE

        --编辑 pg_hba.conf 配置 repmgr 用户认证

[postgres@repmgr01 conf]$ egrep "repmgr" $PGDATA/pg_hba.conf

host    all             repmgr          0.0.0.0/0               trust #此行一定要放置于 all 的前面哦

host    replication     repmgr          10.10.10.0/24           trust

重新加载配置文件

[postgres@repmgr01 conf]$ pg_ctl reload

2.3.1 Repmgr配置文件

 

#节点一上配置repmgr.conf

touch /home/postgres/repmgr.conf

vi /home/postgres/repmgr.conf

 

#脚本内容

node_id=201

node_name='repmgr01'

conninfo='host=10.10.10.201 port=6000 user=repmgr dbname=repmgr connect_timeout=2'

data_directory= '/home/postgres/data'

replication_user= 'repmgr'

replication_type= 'physical'

repmgr_bindir= '/home/postgres/soft/bin'

pg_bindir= '/home/postgres/soft/bin'

monitoring_history=yes

monitor_interval_secs=5

log_level='debug'

log_file='/home/postgres/repmgr.log'

failover='automatic'

connection_check_type=ping

reconnect_attempts=3

reconnect_interval=10

promote_command='/home/postgres/repmgr_promote.sh'

follow_command='/home/postgres/repmgr_follow.sh %n'

 

 

#节点一上配置repmgr_promote.sh

 

vim repmgr_promote.sh

 

#脚本内容

 

#!/bin/bash

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: del VIP in 202 start" >> /home/postgres/repmgr.log

 

/bin/ssh -t postgres@10.10.10.202 "/bin/sudo /usr/sbin/ip addr del 10.10.10.111/24 dev ens37"

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: del VIP in 202 finish" >> /home/postgres/repmgr.log

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: promote start" >> /home/postgres/repmgr.log

 

/home/postgres/soft/bin/repmgr standby promote -f /home/postgres/repmgr.conf --log-to-file

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: promote finish" >> /home/postgres/repmgr.log

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: add VIP start" >> /home/postgres/repmgr.log

 

/bin/sudo /usr/sbin/ip addr add 10.10.10.111/24 dev ens37

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: add VIP finish" >> /home/postgres/repmgr.log

 

 

#节点一上配置repmgr_follow.sh

vim repmgr_follow.sh

#脚本内容

#! /bin/bash

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: follow $1" >> /home/postgres/repmgr.log

 

/home/postgres/soft/bin/repmgr standby follow -f /home/postgres/repmgr.conf --upstream-node-id=$1 --log-to-file

 

 

#节点二上配置repmgr.conf

 

 

vi /home/postgres/repmgr.conf

#脚本内容

node_id=202

node_name='repmgr02'

conninfo='host=10.10.10.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2'

data_directory= '/home/postgres/data '

replication_user= 'repmgr'

replication_type= 'physical'

repmgr_bindir= '/home/postgres/soft/bin'

pg_bindir= '/home/postgres/soft/bin '

monitoring_history=yes

monitor_interval_secs=5

log_level='debug'

log_file='/home/postgres/repmgr.log'

failover='automatic'

connection_check_type=ping

reconnect_attempts=3

reconnect_interval=10

promote_command='/home/postgres/repmgr_promote.sh'

follow_command='/home/postgres/repmgr_follow.sh %n'

 

 

#节点二上配置repmgr_promote.sh

vim repmgr_promote.sh

#脚本内容

#!/bin/bash

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: del VIP in 201 start" >> /home/postgres/repmgr.log

 

/bin/ssh -t postgres@10.10.10.201 "/bin/sudo /usr/sbin/ip addr del 10.10.10.111/24 dev ens37"

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: del VIP in 201 finish" >> /home/postgres/repmgr.log

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: promote start" >> /home/postgres/repmgr.log

 

/home/postgres/soft/bin/repmgr standby promote -f /home/postgres/repmgr.conf --log-to-file

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: promote finish" >> /home/postgres/repmgr.log

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: add VIP start" >> /home/postgres/repmgr.log

 

/bin/sudo /usr/sbin/ip addr add 10.10.10.111/24 dev ens37

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: add VIP finish" >> /home/postgres/repmgr.log

 

 

 

#节点二上配置 repmgr_follow.sh

vim repmgr_follow.sh

#脚本内容

#! /bin/bash

 

echo "["`date "+%Y-%m-%d %H:%M:%S"`"]: follow $1" >> /home/postgres/repmgr.log

 

/home/postgres/soft/bin/repmgr standby follow -f /home/postgres/repmgr.conf --upstream-node-id=$1 --log-to-file

 

2.4 Repmgr配置后注册

2.4.1 主节点配置

 

#节点一主节点注册

 

repmgr -f /home/postgres/repmgr.conf primary register

#注册之后查看

repmgr -f /home/postgres/repmgr.conf cluster show   

 

[postgres@repmgr01 ~]$ repmgr -f /home/postgres/repmgr.conf cluster show

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

 ID  | Name     | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                      

-----+----------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------------------

 201 | repmgr01 | primary | * running |          | default  | 100      | 1        | host=10.10.10.201 port=6000 user=repmgr dbname=repmgr connect_timeout=2

[postgres@repmgr01 ~]$

2.4.1 从节点配置

 

#从节点安装PostgreSQL后,安装Repmgr跟节点1的步骤相似,现在要删除节点二上的$PGDATA目录,删除后执行以下操作

 

[postgres@repmgr02 ~]$ repmgr -h repmgr01 -p6000 -U repmgr -d repmgr -f /home/postgres/repmgr.conf  --dry-run standby clone

 

NOTICE: destination directory "/home/postgres/data" provided

INFO: connecting to source node

DETAIL: connection string is: host=repmgr01 port=6000 user=repmgr dbname=repmgr

DETAIL: current installation size is 30 MB

INFO: "repmgr" extension is installed in database "repmgr"

DEBUG: 1 node records returned by source node

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: upstream_node_id determined as 201

INFO: parameter "max_wal_senders" set to 10

NOTICE: checking for available walsenders on the source node (2 required)

INFO: sufficient walsenders available on the source node

DETAIL: 2 required, 10 available

NOTICE: checking replication connections can be made to the source server (2 required)

INFO: required number of replication connections could be made to the source server

DETAIL: 2 replication connections required

NOTICE: standby will attach to upstream node 201

HINT: consider using the -c/--fast-checkpoint option

INFO: all prerequisites for "standby clone" are met

 

#如果上述显示成功则执行以下

 

[postgres@repmgr02 ~]$ repmgr -h repmgr01 -p6000 -U repmgr -d repmgr -f /home/postgres/repmgr.conf   standby clone

NOTICE: destination directory "/home/postgres/data" provided

INFO: connecting to source node

DETAIL: connection string is: host=repmgr01 port=6000 user=repmgr dbname=repmgr

DETAIL: current installation size is 30 MB

DEBUG: 1 node records returned by source node

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: upstream_node_id determined as 201

NOTICE: checking for available walsenders on the source node (2 required)

NOTICE: checking replication connections can be made to the source server (2 required)

INFO: creating directory "/home/postgres/data"...

NOTICE: starting backup (using pg_basebackup)...

HINT: this may take some time; consider using the -c/--fast-checkpoint option

INFO: executing:

  /home/postgres/soft/bin/pg_basebackup -l "repmgr base backup"  -D /home/postgres/data -h repmgr01 -p 6000 -U repmgr -X stream

DEBUG: create_recovery_file(): creating "/home/postgres/data/recovery.conf"...

DEBUG: recovery.conf line: standby_mode = 'on'

 

DEBUG: recovery.conf line: primary_conninfo = 'host=10.10.10.201 port=6000 user=repmgr application_name=repmgr02 connect_timeout=2'

 

DEBUG: recovery.conf line: recovery_target_timeline = 'latest'

 

NOTICE: standby clone (using pg_basebackup) complete

NOTICE: you can now start your PostgreSQL server

HINT: for example: pg_ctl -D /home/postgres/data start

HINT: after starting the server, you need to register this standby with "repmgr standby register"

 

 

#注册从库

 

repmgr -f /home/postgres/repmgr.conf standby register

 

[postgres@repmgr02 data]$ repmgr -f /home/postgres/repmgr.conf standby register

INFO: connecting to local node "repmgr02" (ID: 202)

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

INFO: connecting to primary database

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

WARNING: --upstream-node-id not supplied, assuming upstream node is primary (node ID 201)

INFO: standby registration complete

NOTICE: standby node "repmgr02" (ID: 202) successfully registered

 

#查看节点一上数据库是否能够显示

[postgres@repmgr01 ~]$ repmgr -f /home/postgres/repmgr.conf cluster show

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

 ID  | Name     | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                      

-----+----------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------------------

 201 | repmgr01 | primary | * running |          | default  | 100      | 1        | host=10.10.10.201 port=6000 user=repmgr dbname=repmgr connect_timeout=2

 202 | repmgr02 | standby |   running | repmgr01 | default  | 100      | 1        | host=10.10.10.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2

[postgres@repmgr01 ~]$

 

 

#节点一上添加vip

/bin/sudo /usr/sbin/ip addr add 10.10.10.111/24 dev ens37

 

#节点二上查询

 

[postgres@repmgr02 data]$ psql -h 10.10.10.111 -p 6000 -U repmgr

psql (11.5)

Type "help" for help.

 

repmgr=# \q

 

 

[postgres@repmgr01 ~]$ repmgr -f /home/postgres/repmgr.conf cluster show

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

 ID  | Name     | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                      

-----+----------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------------------

 201 | repmgr01 | primary | * running |          | default  | 100      | 1        | host=10.10.10.201 port=6000 user=repmgr dbname=repmgr connect_timeout=2

 202 | repmgr02 | standby |   running | repmgr01 | default  | 100      | 1        | host=10.10.10.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2

 

#配置守护进程

 

 

注意配置

 shared_preload_libraries = 'repmgr'

 

 

 

create table t(id int);

 

 

[postgres@repmgr01 ~]$ repmgrd -f ~/repmgr.conf -d

[2021-01-18 02:17:56] [NOTICE] redirecting logging output to "/home/postgres/repmgr.log"

 

[postgres@repmgr01 ~]$ repmgrd -f ~/repmgr.conf -d

[2021-01-18 02:17:56] [NOTICE] redirecting logging output to "/home/postgres/repmgr.log"

 

三、 VIP漂移测试

3.1 强杀主节点PG服务

 

[postgres@repmgr01 ~]$ ps -ef|grep postgres

postgres   2169      1  0 01:08 pts/0    00:00:00 /home/postgres/soft/bin/postgres -D /home/postgres/data

postgres   2170   2169  0 01:08 ?        00:00:00 postgres: repmgr01: logger   

postgres   2172   2169  0 01:08 ?        00:00:00 postgres: repmgr01: checkpointer   

postgres   2173   2169  0 01:08 ?        00:00:00 postgres: repmgr01: background writer   

postgres   2174   2169  0 01:08 ?        00:00:00 postgres: repmgr01: walwriter   

postgres   2175   2169  0 01:08 ?        00:00:00 postgres: repmgr01: autovacuum launcher   

postgres   2176   2169  0 01:08 ?        00:00:00 postgres: repmgr01: stats collector   

postgres   2177   2169  0 01:08 ?        00:00:00 postgres: repmgr01: logical replication launcher   

root       2506   1700  0 01:19 pts/0    00:00:00 su - postgres

postgres   2507   2506  0 01:19 pts/0    00:00:00 -bash

postgres   2933   2169  0 01:38 ?        00:00:00 postgres: repmgr01: walsender repmgr 10.10.10.202(41464) streaming 0/3000660

postgres   2943   2507  0 01:38 pts/0    00:00:00 ps -ef

postgres   2944   2507  0 01:38 pts/0    00:00:00 grep --color=auto postgres

[postgres@repmgr01 ~]$ kill -9 2169

 

#查看

 

[postgres@repmgr01 ~]$ ps -ef|grep postgres

root       2506   1700  0 01:19 pts/0    00:00:00 su - postgres

postgres   2507   2506  0 01:19 pts/0    00:00:00 -bash

postgres   2967   2507  0 01:39 pts/0    00:00:00 ps -ef

postgres   2968   2507  0 01:39 pts/0    00:00:00 grep --color=auto postgres

[postgres@repmgr01 ~]$ repmgr -f /home/postgres/repmgr.conf cluster show

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

ERROR: connection to database failed

DETAIL:

could not connect to server: Connection refused

Is the server running on host "10.10.10.201" and accepting

TCP/IP connections on port 6000?

 

DETAIL: attempted to connect using:

  user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr

[postgres@repmgr01 ~]$

#节点二上查看

[postgres@repmgr02 ~]$ repmgr -f /home/postgres/repmgr.conf cluster show

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

 ID  | Name     | Role    | Status        | Upstream   | Location | Priority | Timeline | Connection string                                                      

-----+----------+---------+---------------+------------+----------+----------+----------+-------------------------------------------------------------------------

 201 | repmgr01 | primary | ? unreachable | ?          | default  | 100      |          | host=10.10.10.201 port=6000 user=repmgr dbname=repmgr connect_timeout=2

 202 | repmgr02 | standby |   running     | ? repmgr01 | default  | 100      | 1        | host=10.10.10.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2

 

WARNING: following issues were detected

  - unable to connect to node "repmgr01" (ID: 201)

  - node "repmgr01" (ID: 201) is registered as an active primary but is unreachable

  - unable to connect to node "repmgr02" (ID: 202)'s upstream node "repmgr01" (ID: 201)

  - unable to determine if node "repmgr02" (ID: 202) is attached to its upstream node "repmgr01" (ID: 201)

 

HINT: execute with --verbose option to see connection error messages

[postgres@repmgr02 ~]$ ip addr

 

 

#恢复节点1

[postgres@repmgr01 data]$ pg_ctl start -D $PGDATA -l /tmp/logfile

pg_ctl: another server might be running; trying to start server anyway

waiting for server to start.... done

server started

[postgres@repmgr01 data]$ pg_ctl stop -D $PGDATA -l /tmp/logfile

waiting for server to shut down.... done

server stopped

 

#重新加入集群

[postgres@repmgr01 data]$ repmgr -f /home/postgres/repmgr.conf node rejoin -d 'host=10.10.10.111 port=6000 user=repmgr dbname=repmgr connect_timeout=2' --force-rewind

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

DEBUG: local tli: 1; local_xlogpos: 0/3030850; follow_target_history->tli: 1; follow_target_history->end: 0/30303B0

NOTICE: pg_rewind execution required for this node to attach to rejoin target node 202

DETAIL: rejoin target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/3030850

NOTICE: executing pg_rewind

DETAIL: pg_rewind command is "/home/postgres/soft/bin/pg_rewind -D '/home/postgres/data' --source-server='host=10.10.10.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2'"

NOTICE: 0 files copied to /home/postgres/data

NOTICE: setting node 201's upstream to node 202

DEBUG: create_recovery_file(): creating "/home/postgres/data/recovery.conf"...

DEBUG: recovery.conf line: standby_mode = 'on'

 

DEBUG: recovery.conf line: primary_conninfo = 'user=repmgr connect_timeout=2 host=10.10.10.202 port=6000 application_name=repmgr01'

 

DEBUG: recovery.conf line: recovery_target_timeline = 'latest'

 

WARNING: unable to ping "host=10.10.10.201 port=6000 user=repmgr dbname=repmgr password=1qaz@WSX connect_timeout=2"

DETAIL: PQping() returned "PQPING_NO_RESPONSE"

NOTICE: starting server using "/home/postgres/soft/bin/pg_ctl  -w -D '/home/postgres/data' start"

NOTICE: NODE REJOIN successful

DETAIL: node 201 is now attached to node 202

 

#节点二上查询集群状态

[postgres@repmgr02 data]$  repmgr -f /home/postgres/repmgr.conf cluster show

DEBUG: connecting to: "user=repmgr password=1qaz@WSX connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.201 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

DEBUG: connecting to: "user=repmgr connect_timeout=2 dbname=repmgr host=10.10.10.202 port=6000 fallback_application_name=repmgr"

 ID  | Name     | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                                      

-----+----------+---------+-----------+----------+----------+----------+----------+-------------------------------------------------------------------------

 201 | repmgr01 | standby |   running | repmgr02 | default  | 100      | 1        | host=10.10.10.201 port=6000 user=repmgr dbname=repmgr connect_timeout=2

 202 | repmgr02 | primary | * running |          | default  | 100      | 2        | host=10.10.10.202 port=6000 user=repmgr dbname=repmgr connect_timeout=2