1.postgres

(--with-python,python3)


-- 操作系统

CentOS Linux release 7.4.1708 (Core)

-- hosts文件

/etc/hosts

127.0.0.1 localhost

# yum install epel-release gcc-c++ zlib-devel readline-devel python3-pip python3-devel lz4-devel perl-ExtUtils-Embed openssl-devel libxml2-devel libzstd-devel bzip2-devel libyaml-devel

# ./configure --prefix=/opt/pgsql/14.1 --with-lz4 --with-perl --with-python PYTHON=/usr/bin/python3 --with-openssl --with-libxml --with-segsize=4 --enable-debug

# make

# make install

2.pgbackrest

(openssl-devel libxml2-devel lz4-devel libzstd-devel bzip2-devel libyaml-devel)

# export PATH=/opt/pgsql/14.1/bin:$PATH

# export LD_LIBRARY_PATH=/opt/pgsql/14.1/lib:$LD_LIBRARY_PATH

# tar zxvf pgbackrest-release-2.36.tar.gz

# cd pgbackrest-release-2.36

# cd src/

# ./configure

# make

# make install

3.patroni

# export PATH=/opt/pgsql/14.1/bin:$PATH

# pip3 install -i https://mirrors.aliyun.com/pypi/simple --upgrade pip

# pip3 --version

WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.

To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.

pip 21.3.1 from /usr/local/lib/python3.6/site-packages/pip (python 3.6)

#

# pip3 install -i https://mirrors.aliyun.com/pypi/simple psycopg2

# pip3 install -i https://mirrors.aliyun.com/pypi/simple setuptools_rust

# pip3 install -i https://mirrors.aliyun.com/pypi/simple patroni[raft]

# pip3 list

WARNING: pip is being invoked by an old script wrapper. This will fail in a future version of pip.

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.

To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.

Package Version

------------------ -------

cffi 1.15.0

click 8.0.3

cryptography 36.0.0

importlib-metadata 4.8.2

patroni 2.1.2

pip 21.3.1

prettytable 2.4.0

psutil 5.8.0

psycopg2 2.9.2

pycparser 2.21

pysyncobj 0.3.10

python-dateutil 2.8.2

PyYAML 6.0

semantic-version 2.8.5

setuptools 59.5.0

setuptools-rust 1.1.2

six 1.16.0

typing_extensions 4.0.1

urllib3 1.26.7

wcwidth 0.2.5

ydiff 1.2

zipp 3.6.0

[root@pg01 ~]#


4.bloate tools

# pgcompacttable

# yum install perl-Time-HiRes perl-DBI perl-DBD-Pg

5.建立数据库用户

# useradd postgres

# passwd postgres

# mkdir /pgdata

# chown postgres.postgres /pgdata/

# chmod 700 /pgdata/

-- 初始数据库,优化参数

-- pg_hba.conf

$ more /pgdata/pg_hba.conf

local all all trust

host all all 127.0.0.1/32 md5

host all all 0.0.0.0/0 md5

host replication repl_user 127.0.0.1/32 trust

host replication repl_user 192.168.43.0/24 md5

$


-- postgresql.conf

$ grep -v '^#' postgresql.base.conf|grep -v '^$'|grep -v '^[[:space:]]*#'

listen_addresses = '*' # what IP address(es) to listen on;

max_connections = 100 # (change requires restart)

shared_buffers = 128MB # min 128kB

dynamic_shared_memory_type = posix # the default is the first option

wal_level = logical # minimal, replica, or logical

wal_log_hints = on # also do full page writes of non-critical updates

max_wal_size = 10GB

min_wal_size = 80MB

archive_mode = on # enables archiving; off, on, or always

archive_command = '/bin/true' # command to use to archive a logfile segment

wal_keep_size = 10240 # in megabytes; 0 disables

hot_standby_feedback = on # send info from standby to prevent

logging_collector = on # Enable capturing of stderr and csvlog

log_filename = 'postgresql-%a.log' # log file name pattern,

log_rotation_size = 0 # Automatic rotation of logfiles will

log_truncate_on_rotation = on # If on, an existing log file with the

log_min_duration_statement = 5000 # -1 is disabled, 0 logs all statements

log_autovacuum_min_duration = 0 # log autovacuum activity;

log_checkpoints = on

log_lock_waits = on # log lock waits >= deadlock_timeout

log_recovery_conflict_waits = on # log standby recovery conflict waits

log_temp_files = 0 # log temporary files equal or larger

log_timezone = 'PRC'

track_io_timing = on

track_wal_io_timing = on

compute_query_id = on

datestyle = 'iso, mdy'

timezone = 'PRC'

lc_messages = 'en_US.UTF-8' # locale for system error message

lc_monetary = 'en_US.UTF-8' # locale for monetary formatting

lc_numeric = 'en_US.UTF-8' # locale for number formatting

lc_time = 'en_US.UTF-8' # locale for time formatting

default_text_search_config = 'pg_catalog.english'

注意事项:

创建数据库一主两从集群。

验证主从服务正常。

然后停止数据库服务。


6.patroni

-- 创建目录

# mkdir /var/lib/raft

# chown postgres.postgres /var/lib/raft

-- 配置文件

# touch /etc/patroni.yml

# chown postgres.postgres /etc/patroni.yml

# su - postgres

$ vi /etc/patroni.yml

scope: pg_cluster01

namespace: /service/

name: pdb01


restapi:

listen: 192.168.43.100:8008

connect_address: 192.168.43.100:8008


raft:

data_dir: /var/lib/raft

self_addr: 192.168.43.100:5010

partner_addrs: ['192.168.43.218:5010','192.168.43.71:5010']


bootstrap:

dcs:

ttl: 30

loop_wait: 10

retry_timeout: 10

maximum_lag_on_failover: 1048576

master_start_timeout: 300

synchronous_mode: on

postgresql:

use_pg_rewind: true

use_slots: true

parameters:

max_connections: 1000

wal_level: logical

wal_log_hints: on


initdb:

- encoding:UTF8

- data-checksums


pg_hba:

- host all all 0.0.0.0/0 md5

- host replication repl_user 192.168.43.0/24 md5


postgresql:

listen: 0.0.0.0:5432

connect_address: 192.168.43.100:5432

data_dir: /pgdata

bin_dir: /opt/pgsql/14.1/bin

pgpass: /home/postgres/.pgpass

authentication:

replication:

username: repl_user

password: repl_user

superuser:

username: postgres

password: postgres

rewind:

username: postgres

password: postgres

parameters:

unix_socket_directories: '.'


watchdog:

mode: automatic # Allowed values: off, automatic, required

device: /dev/watchdog

safety_margin: 5


tags:

nofailover: false

noloadbalance: false

clonefrom: false

nosync: false


-- 校验配置


$ patroni --validate-config /etc/patroni.yml

raft.partner_addrs ['192.168.43.218:5010', '192.168.43.71:5010'] didn't pass validation: '192.168.43.218:5010 is not reachable'

$


-- patroni service file


cat <<EOF | tee /etc/systemd/system/patroni.service


[Unit]

Description=Runners to orchestrate a high-availability PostgreSQL

After=syslog.target network.target


[Service]

User=postgres

Group=postgres

Type=simple

ExecStartPre=-/bin/sudo /usr/sbin/modprobe softdog

ExecStartPre=-/bin/sudo /bin/chown postgres /dev/watchdog

Environment="LD_LIBRARY_PATH=/opt/pgsql/14.1/lib"

ExecStart=/usr/local/bin/patroni /etc/patroni.yml

ExecReload=/bin/kill -s HUP $MAINPID

KillMode=process

Restart=no

TimeoutSec=30


[Install]

WantedBy=multi-user.target

EOF


-- postgres sudo


## Allow root to run any commands anywhere

root ALL=(ALL) ALL

postgres ALL=(ALL) NOPASSWD:ALL


-- 集群启动命令


# systemctl start patroni


-- 集群启动顺序


集群初次顺序:

primary--> slave01 --> slave02


集群停止顺序:

replication --> sync standby --> primary


集群后续启动顺序:

primary --> sync standby --> replication


7.Raft集群状态检查

$ syncobj_admin -conn 192.168.43.100:5010 -status

commit_idx: 699

enabled_code_version: 0

last_applied: 699

leader: 192.168.43.100:5010

leader_commit_idx: 699

log_len: 198

match_idx_count: 2

match_idx_server_192.168.43.218:5010: 699

match_idx_server_192.168.43.71:5010: 699

next_node_idx_count: 2

next_node_idx_server_192.168.43.218:5010: 700

next_node_idx_server_192.168.43.71:5010: 700

partner_node_status_server_192.168.43.218:5010: 2

partner_node_status_server_192.168.43.71:5010: 2

partner_nodes_count: 2

raft_term: 1

readonly_nodes_count: 0

revision: deprecated

self: 192.168.43.100:5010

self_code_version: 0

state: 2

uptime: 183

version: 0.3.10

$


8.集群switchover

$ patronictl -c /etc/patroni.yml list

+--------+----------------+--------------+---------+----+-----------+

| Member | Host | Role | State | TL | Lag in MB |

+ Cluster: pg_cluster01 (7038882294140871964) -----+----+-----------+

| pdb01 | 192.168.43.100 | Sync Standby | running | 3 | 0 |

| pdb02 | 192.168.43.218 | Leader | running | 3 | |

| pdb03 | 192.168.43.71 | Replica | running | 3 | 0 |

+--------+----------------+--------------+---------+----+-----------+

$ patronictl -c /etc/patroni.yml list

+--------+----------------+--------------+---------+----+-----------+

| Member | Host | Role | State | TL | Lag in MB |

+ Cluster: pg_cluster01 (7038882294140871964) -----+----+-----------+

| pdb01 | 192.168.43.100 | Sync Standby | running | 3 | 0 |

| pdb02 | 192.168.43.218 | Leader | running | 3 | |

| pdb03 | 192.168.43.71 | Replica | running | 3 | 0 |

+--------+----------------+--------------+---------+----+-----------+

[postgres@pg02 ~]$ patronictl -c /etc/patroni.yml switchover

Master [pdb02]:

Candidate ['pdb01', 'pdb03'] []: pdb01

When should the switchover take place (e.g. 2021-12-07T22:53 ) [now]:

Current cluster topology

+--------+----------------+--------------+---------+----+-----------+

| Member | Host | Role | State | TL | Lag in MB |

+ Cluster: pg_cluster01 (7038882294140871964) -----+----+-----------+

| pdb01 | 192.168.43.100 | Sync Standby | running | 3 | 0 |

| pdb02 | 192.168.43.218 | Leader | running | 3 | |

| pdb03 | 192.168.43.71 | Replica | running | 3 | 0 |

+--------+----------------+--------------+---------+----+-----------+

Are you sure you want to switchover cluster pg_cluster01, demoting current master pdb02? [y/N]: y

2021-12-07 21:53:52.72130 Successfully switched over to "pdb01"

+--------+----------------+---------+---------+----+-----------+

| Member | Host | Role | State | TL | Lag in MB |

+ Cluster: pg_cluster01 (7038882294140871964) +----+-----------+

| pdb01 | 192.168.43.100 | Leader | running | 3 | |

| pdb02 | 192.168.43.218 | Replica | stopped | | unknown |

| pdb03 | 192.168.43.71 | Replica | running | 3 | 0 |

+--------+----------------+---------+---------+----+-----------+

$


$ patronictl -c /etc/patroni.yml list

+--------+----------------+--------------+---------+----+-----------+

| Member | Host | Role | State | TL | Lag in MB |

+ Cluster: pg_cluster01 (7038882294140871964) -----+----+-----------+

| pdb01 | 192.168.43.100 | Leader | running | 4 | |

| pdb02 | 192.168.43.218 | Replica | running | 4 | 0 |

| pdb03 | 192.168.43.71 | Sync Standby | running | 4 | 0 |

+--------+----------------+--------------+---------+----+-----------+

$

9.故障切换

停止pdb01服务down

# systemctl stop patroni

#

切换结果

$ patronictl -c /etc/patroni.yml list

+--------+----------------+--------------+---------+----+-----------+

| Member | Host | Role | State | TL | Lag in MB |

+ Cluster: pg_cluster01 (7038882294140871964) -----+----+-----------+

| pdb02 | 192.168.43.218 | Sync Standby | running | 5 | 0 |

| pdb03 | 192.168.43.71 | Leader | running | 5 | |

+--------+----------------+--------------+---------+----+-----------+

$

启动pdb01服务

# systemctl start patroni

集群状态检查

$ patronictl -c /etc/patroni.yml list

+--------+----------------+--------------+---------+----+-----------+

| Member | Host | Role | State | TL | Lag in MB |

+ Cluster: pg_cluster01 (7038882294140871964) -----+----+-----------+

| pdb01 | 192.168.43.100 | Replica | running | 5 | 0 |

| pdb02 | 192.168.43.218 | Sync Standby | running | 5 | 0 |

| pdb03 | 192.168.43.71 | Leader | running | 5 | |

+--------+----------------+--------------+---------+----+-----------+

$


10.错误排查

1).patroni日志提示pg_hba.conf文件缺少IPV6信息

2021-12-07 17:21:17.643 CST [14332] FATAL: no pg_hba.conf entry for host "::1", user "postgres", database "postgres", no encryption

解决办法:

hosts文件去掉IPV6,仅保留如下内容

# more /etc/hosts

127.0.0.1 localhost

#

2).patroni日志提示pg_hba.conf文件缺少replication连接localhost信息

psycopg2.OperationalError: connection to server at "localhost" (127.0.0.1), port 5432 failed: FATAL: no pg_hba.conf entry for replication connection from ..., no encryption

解决办法:

primary数据库pg_hba.conf添加如下内容:

host replication repl_user 127.0.0.1/32 trust

然后reload数据库配置