Heartbeat+nfs+DRBD实验
一、DRBD试验简介
本实验部署DRBD + HEARDBEAT + NFS 环境,建立一个高可用(HA)的文件服务器集群。在方案中,通
过DRBD保证了服务器数据的完整性和一致性。DRBD类似于一个网络RAID-1功能。当你将数据写入本
地文件系统时,数据还将会被发送到网络中另一台主机上,以相同的形式记录在一个另文件系统中。主节
点与备节点的数据可以保证实时相互同步。当本地主服务器出现故障时,备份服务器上还会保留有一份相
同的数据,可以继续使用。在高可用(HA)中使用DRBD功能,可以代替使用一个共享盘阵。因为数据同时
存在于本地主服务器和备份服务器上。切换时,远程主机只要使用它上面的那份备份数据,就可以继续提
供主服务器上相同的服务,并且client用户对主服务器的故障无感知。
实验环境
虚拟机操作系统:Centos5.4 32bit
两台服务器:node1.a.com ip 192.168.101.250
node2.1.com ip 192.168.101.249
VIP:192.168.101.200
两台服务器将/dev/sda4互为镜像
两台服务器/etc/export配置相同
Node1配置:
给接口配置ip
修改主机名
[root@jun ~]# vim /etc/sysconfig/network
NETWORKING=yes
NETWORKING_IPV6=yes
HOSTNAME=node1.a.com
[root@jun ~]# init 6 重启使主机名生效
[root@node1 ~]# hwclock –s 同步时间,必须有
修改hosts文件:
[root@node1 ~]# vim /etc/hosts 修改dns缓存
127.0.0.1 localhost.localdomain localhost
::1 localhost6.localdomain6 localhost6
192.168.101.250 node1.a.com
192.168.101.249 node2.a.com
配置yum:
[root@node1 ~]# vim /etc/yum.repos.d/server.repo
[rhel-server]
name=Red Hat Enterprise Linux server
baseurl=file:///mnt/cdrom/Server
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
同时要挂载光盘
Drbd配置
[root@node1 ~]# yum -y localinstall *.rpm –nogpgcheck 安装drbd包
[root@node1 ~]# modprobe drbd 加载drbd模块
[root@node1 ~]# lsmod |grep drbd 查看加载
drbd 228528 0
创建新分区:
[root@node1 ~]# fdisk /dev/sda
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Selected partition 4
First cylinder (1416-5221, default 1416): 1416
Last cylinder or +size or +sizeM or +sizeK (1416-5221, default 5221): +1G
Command (m for help): w
[root@node1 ~]# partprobe /dev/sda
[root@node1 ~]# cat /proc/partitions
major minor #blocks name
8 0 41943040 sda
8 1 104391 sda1
8 2 10241437 sda2
8 3 1020127 sda3
8 4 9775552 sda4
复制相关文件到node2上:
[root@node1 ~]# scp *.rpm node2.a.com:/root
修改配置文件:
[root@node1 ~]# cd /etc/
[root@node1 etc]# cp /usr/share/doc/drbd83-8.3.8/drbd.conf ./
[root@node1 ~]# cd /etc/drbd.d/
[root@node1 drbd.d]# ll
total 4
-rwxr-xr-x 1 root root 1418 Jun 4 2010 global_common.conf
[root@node1 drbd.d]# cp global_common.conf global_common.conf.bak 备份文件
[root@node1 drbd.d]# vim global_common.conf
1 global {
2 usage-count no;
3 # minor-count dialog-refresh disable-ip-verification
4 }
5
6 common {
7 protocol C;
8
9 startup {
10 wfc-timeout 120;
11 degr-wfc-timeout 120;
12 }
13
14 disk {
15 on-io-error detach;
16 # fencing resource-only;
17 }
18 net {
19 cram-hmac-alg "sha1";
20 shared-secret "mydrbdlab";
21
22 }
23
24 syncer {
25 rate 100m;
26 }
27 }
4、检测配置文件
[root@node1 drbd.d]# drbdadm adjust r0
no resources defined
[root@node1 drbd.d]# vim web.res 创建资源文件
resource web {
on node1.a.com {
device /dev/drbd0;
disk /dev/sda4;
address 192.168.101.250:7789;
meta-disk internal;
}
on node2.a.com {
device /dev/drbd0;
disk /dev/sda4;
address 192.168.101.249:7789;
meta-disk internal;
[root@node1 drbd.d]# scp /etc/drbd.conf node2.a.com:/etc/
[root@node1 drbd.d]# scp /etc/drbd.d/* node2.a.com:/etc/drbd.d/
[root@node1 ~]# drbdadm create-md web 创建ro资源
[root@node1 drbd.d]# service drbd start 两边需要都启动,才可以启动成功
[root@node1 drbd.d]# drbdadm -- --overwrite-data-of-peer primary web 将其设为主设备
[root@node1 drbd.d]# watch -n 1 'cat /proc/drbd'
[root@node1 ~]# cat /proc/drbd 查看drbd状态
version: 8.3.8 (api:88/proto:86-94)
GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by mockbuild@builder10.centos.org, 2010-06-04 08:04:16
0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
[root@node1 drbd.d]# mkfs -t ext3 -L drbdweb /dev/drbd0 格式化分区
[root@node1 ~]# mkdir /data
[root@node1 ~]# mount /dev/drbd0 /data 挂载分区
[root@node1 ~]# service drbd status
drbd driver loaded OK; device status:
version: 8.3.8 (api:88/proto:86-94)
GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by mockbuild@builder10.centos.org, 2010-06-04 08:04:16
m:res cs ro ds p mounted fstype
0:web Connected Primary/Secondary UpToDate/UpToDate C /mnt/1 ext3
Nfs配置:
[root@node1 ~]# vim /etc/exports 两台服务器一致
/data *(rw,sync,insecure,no_root_squash,no_wdelay)
[root@node1 ~]# service portmap start
Starting portmap: [ OK ]
[root@node1 ~]# service nfs start
Starting NFS services: [ OK ]
Starting NFS quotas: [ OK ]
Starting NFS daemon: [ OK ]
Starting NFS mountd: [ OK ]
[root@node1 ~]# chkconfig portmap on
[root@node1 ~]# chkconfig nfs on
[root@node1 ~]# vim /etc/init.d/nfs
116 stop)
117 # Stop daemons.
118 echo -n $"Shutting down NFS mountd: "
119 killproc rpc.mountd
120 echo
121 echo -n $"Shutting down NFS daemon: "
122 killproc nfsd -9 两台服务器都修改nfs 启动脚本。将/etc/init.d/nfs 脚本中的stop 部分中的killproc ,nfsd -2 修改为 -9
[root@node1 ~]# scp /etc/exports node2.a.com:/etc/
Heartbeat配置:
安装相关包:
[root@node1 ~]# yum localinstall heartbeat-2.1.4-9.el5.i386.rpm heartbeat-pils-2.1.4-10.el5.i386.rpm libnet-1.1.4-3.el5.i386.rpm heartbeat-stonith-2.1.4-10.el5.i386.rpm perl-MailTools-1.77-1.el5.noarch.rpm
拷贝配置文档:
[root@node1 ~]# cd /usr/share/doc/heartbeat-2.1.4/
[root@node1 heartbeat-2.1.4]# cp authkeys ha.cf haresources /etc/ha.d/
安装配置文档:
[root@node1 heartbeat-2.1.4]# cd /etc/ha.d/
[root@node1 ha.d]# vim ha.cf
24 debugfile /var/log/ha-debug
29 logfile /var/log/ha-log
48 keepalive 2
49 #
56 deadtime 30
76 udpport 692
121 ucast eth0 192.168.101.249
157 auto_failback off
211 node node1.a.com
212 node node2.a.com
220 ping 192.168.101.1
指明主服务器
[root@node1 ha.d]# echo "node1.a.com IPaddr::192.168.101.200/24/eth0 drbddisk::web Filesystem::/dev/drbd0::/data::ext3 killnfsd" >> /etc/ha.d/haresources
[root@node1 ha.d]# vim authkeys
23 auth 1
24 1 crc
[root@node1 ha.d]# echo "killall -9 nfsd; /etc/init.d/nfs restart; exit 0" >> ./ resource.d/killnfsd
[root@node1 ha.d]# chmod 600 ./authkeys
[root@node1 ha.d]# chmod 755 ./resource.d/killnfsd
[root@node1 ha.d]# service heartbeat start
[root@node1 ha.d]# service heartbeat restart
[root@node1 ~]# chkconfig heartbeat on
查看node1 是否出现vip:
Node1上面出现vip 192.168.101.200,node2上面没有:
Node2配置:
[root@jun ~]# vim /etc/sysconfig/network
NETWORKING=yes
NETWORKING_IPV6=yes
HOSTNAME=node2.a.com
[root@jun ~]# init 6
[root@node2 ~]# hwclock -s
[root@node2 ~]# vim /etc/hosts
127.0.0.1 localhost.localdomain localhost
::1 localhost6.localdomain6 localhost6
192.168.101.250 node1.a.com
192.168.101.249 node2.a.com
[root@node2 ~]# vim /etc/yum.repos.d/server.repo
[rhel-server]
name=Red Hat Enterprise Linux server
baseurl=file:///mnt/cdrom/Server
enabled=1
gpgcheck=1
gpgkey=file:///mnt/cdrom/RPM-GPG-KEY-redhat-release
同时要挂载光盘
Drbd配置:
[root@node2 ~]# yum -y localinstall *.rpm –nogpgcheck
[root@node2 ~]# fdisk /dev/sda
Command (m for help): n
Command action
e extended
p primary partition (1-4)
p
Selected partition 4
First cylinder (1416-5221, default 1416): 1416
Last cylinder or +size or +sizeM or +sizeK (1416-5221, default 5221): +10000m
Command (m for help): w
[root@node2 ~]# partprobe /dev/sda
[root@node2 ~]# cat /proc/partitions
major minor #blocks name
8 0 41943040 sda
8 1 104391 sda1
8 2 10241437 sda2
8 3 1020127 sda3
8 4 9775552 sda4
[root@node2 ~]# drbdadm create-md web
[root@node1 drbd.d]# service drbd start
[root@node2 drbd.d]# cat /proc/drbd
version: 8.3.8 (api:88/proto:86-94)
GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by mockbuild@builder10.centos.org, 2010-06-04 08:04:16
0: cs:Connected ro:Secondary/Secondary ds:Inconsistent/Inconsistent C r----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:9775216
[root@node2 ~]# service drbd status
drbd driver loaded OK; device status:
version: 8.3.8 (api:88/proto:86-94)
GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by mockbuild@builder10.centos.org, 2010-06-04 08:04:16
m:res cs ro ds p mounted fstype
0:web Connected Secondary/Primary UpToDate/UpToDate C
Nfs配置:
[root@node2 ~]# service portmap start
Starting portmap: [ OK ]
[root@node2 ~]# service nfs start
Starting NFS services: [ OK ]
Starting NFS quotas: [ OK ]
Starting NFS daemon: [ OK ]
Starting NFS mountd: [ OK ]
[root@node2 ~]# chkconfig portmap on
[root@node2 ~]# chkconfig nfs on
配置启动脚本
[root@node2 ~]# vim /etc/init.d/nfs
116 stop)
117 # Stop daemons.
118 echo -n $"Shutting down NFS mountd: "
119 killproc rpc.mountd
120 echo
121 echo -n $"Shutting down NFS daemon: "
122 killproc nfsd -9 两台服务器都修改nfs 启动脚本。将/etc/init.d/nfs 脚本中的stop 部分中的killproc ,nfsd -2 修改为 -9
[root@node1 ha.d]# service heartbeat start
Starting High-Availability services:
2012/05/08_00:52:46 INFO: Resource is stopped
[ OK ]
Heartbeat配置:
安装heartbeat相关包:
[root@node2 ~]# yum localinstall heartbeat-2.1.4-9.el5.i386.rpm heartbeat-pils-2.1.4-10.el5.i386.rpm heartbeat-stonith-2.1.4-10.el5.i386.rpm libnet-1.1.4-3.el5.i386.rpm perl-MailTools-1.77-1.el5.noarch.rpm –nogpgcheck
拷贝配置文档并配置配置文档
[root@node2 ~]# cd /usr/share/doc/heartbeat-2.1.4/
[root@node2 heartbeat-2.1.4]# cp authkeys ha.cf haresources /etc/ha.d/
[root@node2 heartbeat-2.1.4]# cd /etc/ha.d/
[root@node2 ha.d]# ls
README.config authkeys ha.cf harc haresources rc.d resource.d shellfuncs
[root@node2 ha.d]# vim ha.cf
24 debugfile /var/log/ha-debug
29 logfile /var/log/ha-log
48 keepalive 2
49 #
56 deadtime 30
76 udpport 692
121 ucast eth0 192.168.101.249
157 auto_failback off
211 node node1.a.com
212 node node2.a.com
220 ping 192.168.101.1
指明主服务器:
[root@node2 ha.d]# echo "node1.a.com IPaddr::192.168.101.200/24/eth0 drbddisk::web Filesystem::/dev/drbd0::/data::ext3 killnfsd" >> /etc/ha.d/haresources
[root@node2 ha.d]# vim authkeys
23 auth 1
24 1 crc
[root@node2 ha.d]# echo "killall -9 nfsd; /etc/init.d/nfs restart; exit 0" >> ./ resource.d/killnfsd
修改文件权限
[root@node2 ha.d]# chmod 600 ./authkeys
[root@node2 ha.d]# chmod 755 ./resource.d/killnfsd
[root@node2 ha.d]# service heartbeat start
Starting High-Availability services:
2012/05/08_00:52:46 INFO: Resource is stopped
[ OK ]
[root@node2 ha.d]# service heartbeat restart
[root@node2 ~]# chkconfig heartbeat on
测试机配置:
1、在测试机上将192.168.10.188:/data挂到本地/mnt/nfs
[root@server5 ~]# mkdir /mnt/nfs
[root@server5 ~]# mount 192.168.101.200:/data /mnt/nfs
2、在测试机上创建测试shell,二秒一个
[root@server5 ~]# vim /mnt/test.sh
while true
do
echo ---\> trying touch x : `date`
touch x
echo \<----- done touch x : `date`
echo
sleep 2
done
cd /mnt/nfs
[root@server5 nfs]# bash /mnt/test.sh
3、将主节点server1的heartbeat服务停止,则备节点server2接管服务
[root@server5 nfs]# bash /mnt/test.sh
---> trying touch x : Sun May 6 20:03:12 CST 2012
<----- done touch x : Sun May 6 20:03:12 CST 2012
---> trying touch x : Sun May 6 20:03:14 CST 2012
<----- done touch x : Sun May 6 20:03:14 CST 2012
---> trying touch x : Sun May 6 20:03:16 CST 2012
<----- done touch x : Sun May 6 20:03:16 CST 2012
---> trying touch x : Sun May 6 20:03:18 CST 2012
<----- done touch x : Sun May 6 20:03:18 CST 2012
---> trying touch x : Sun May 6 20:03:20 CST 2012
<----- done touch x : Sun May 6 20:03:20 CST 2012
---> trying touch x : Sun May 6 20:03:22 CST 2012
<----- done touch x : Sun May 6 20:03:22 CST 2012
---> trying touch x : Sun May 6 20:03:24 CST 2012
<----- done touch x : Sun May 6 20:03:24 CST 2012
---> trying touch x : Sun May 6 20:03:26 CST 2012
<----- done touch x : Sun May 6 20:03:26 CST 2012
---> trying touch x : Sun May 6 20:03:28 CST 2012
touch: cannot touch `x': Stale NFS file handle
<----- done touch x : Sun May 6 20:03:28 CST 2012 可以看到发生切换。
---> trying touch x : Sun May 6 20:03:33 CST 2012
<----- done touch x : Sun May 6 20:03:33 CST 2012
---> trying touch x : Sun May 6 20:03:35 CST 2012
<----- done touch x : Sun May 6 20:03:35 CST 2012