MySQL+DRBD+heartbeat+LVS+keepalived _forever  高达上

在node1上的操作:
[root@localhost 桌面]# iptables -F
[root@localhost 桌面]# chkconfig iptables off
[root@localhost 桌面]# setenforce 0
[root@localhost 桌面]# ip addr show
1: lo: mtu 16436 qdisc noqueue state UNKNOWN 
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: mtu 1500 qdisc pfifo_fast state UP qlen 1000
    link/ether 00:0c:29:7e:2c:18 brd ff:ff:ff:ff:ff:ff
    inet 192.168.200.157/24 brd 192.168.200.255 scope global eth0
    inet6 fe80::20c:29ff:fe7e:2c18/64 scope link 
       valid_lft forever preferred_lft forever
3: eth1: mtu 1500 qdisc pfifo_fast state UP qlen 1000
    link/ether 00:0c:29:7e:2c:22 brd ff:ff:ff:ff:ff:ff
    inet 192.168.1.157/24 brd 192.168.1.255 scope global eth1
    inet6 fe80::20c:29ff:fe7e:2c22/64 scope link 
       valid_lft forever preferred_lft forever
====================================================================
[root@localhost 桌面]# cat /etc/hosts
192.168.200.157 node1
192.168.200.158 node2
[root@localhost 桌面]# hostname node1
[root@localhost 桌面]# bash
[root@node1 桌面]# 
[root@node1 桌面]# cat /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=node1
=========================================
以上操作同理在node2节点操作:IP地址改为eth0: 192.168.200.158  /eth1: 192.168.1.158
setup-2:安装DRBD
为了方便实验,两台机器都各自加一块10G的硬盘, 然后将刚刚添加的一块硬盘 sdb分区并格式化(步骤略)
首先在node1上的操作:
安装开始 
安装前的准备,先将光盘置入光驱
[root@node1 ~]# umount /dev/cdrom 
[root@node1 ~]# mount /dev/cdrom  /media/
mount: block device /dev/sr0 is write-protected, mounting read-only
[root@node1 ~]# cd /etc/yum.repos.d/
[root@node1 yum.repos.d]# mkdir aaa
[root@node1 yum.repos.d]# mv *.repo aaa/
[root@node1 yum.repos.d]# cp aaa/CentOS-Media.repo ./
[root@node1 yum.repos.d]# vi CentOS-Media.repo 
[c6-media]
name=CentOS-$releasever - Media
baseurl=file:///media/
        file:///media/cdrom/
        file:///media/cdrecorder/
gpgcheck=1
enabled=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-6
[root@node1 ~]# yum -y install kernel-devel kernel-headers flex
(注意:安装kernel-devel一定要和你uname -r 看到的内核版本一致,建议kernel-devel用本地源安装,不要用网络源安装 )
[root@node1 ~]# tar zxf drbd-8.4.3.tar.gz
[root@node1 ~]# cd drbd-8.4.3
[root@node1 drbd-8.4.3]# yum -y install gcc gcc-c++ make 
[root@node1 drbd-8.4.3]# ./configure --prefix=/usr/local/drbd --with-km
[root@node1 drbd-8.4.3]# make KDIR=/usr/src/kernels/2.6.32-431.el6.x86_64/
[root@node1 drbd-8.4.3]# make install
[root@node1 ~]# mkdir -p /usr/local/drbd/var/run/drbd 
[root@node1 ~]# cp /usr/local/drbd/etc/rc.d/init.d/drbd  /etc/rc.d/init.d/ 
[root@node1 ~]# chkconfig --add drbd 
[root@node1 ~]# chkconfig drbd on 
安装drbd模块 
回到刚刚解压drbd的目录,然后 
[root@node1 drbd-8.4.3]# cd drbd 
[root@node1 drbd]# make clean 
[root@node1drbd]# make KDIR=/usr/src/kernels/2.6.32-431.el6.x86_64/
[root@node1 drbd]# cp drbd.ko /lib/modules/`uname -r`/kernel/lib/ 
[root@node1 drbd]# modprobe drbd 
查看模块是否加载成功 
[root@node1 drbd]# lsmod | grep drbd 
drbd                  292307  0
libcrc32c                841  1 drbd 
====================================================================
[root@node1 drbd]# cd /usr/local/drbd/etc
[root@node1 etc]# pwd
/usr/local/drbd/etc
官方文档介绍:http://www.drbd.org/users-guide-8.4/ 
查看drbd的主配置文件 
[root@node1 etc]# pwd 
/usr/local/drbd/etc 
[root@node1 etc]# cat drbd.conf 
# You can find an example in  /usr/share/doc/drbd.../drbd.conf.example 
include "drbd.d/global_common.conf"; 
include "drbd.d/*.res"; 
由此可见:主配置文件里面已经包含了全局配置文件和drbd目录下以.res结尾的文件 
修改全局配置文件: 
[root@node1 drbd.d]# pwd 
/usr/local/drbd/etc/drbd.d 
[root@node1 drbd.d]# ls 
global_common.conf 
配置文件内容如下: 
[root@node1 drbd.d]# cat global_common.conf 
global { 
    usage-count yes; #是否参加drbd的使用者统计,默认此选项为yes 
    # minor-count dialog-refresh disable-ip-verification 

common { 
        protocol C; #使用drbd的同步协议 
    handlers { 
        # These are EXAMPLE handlers only. 
        # They may have severe implications, 
        # like hard resetting the node under certain circumstances. 
        # Be careful when chosing your poison. 
        pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"; 
        pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"; 
        local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f"; 
        # fence-peer "/usr/lib/drbd/crm-fence-peer.sh"; 
        # split-brain "/usr/lib/drbd/notify-split-brain.sh root"; 
        # out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root"; 
        # before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k"; 
        # after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh; 
    } 
    startup { 
        # wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb 
    } 
    options { 
        # cpu-mask on-no-data-accessible 
    } 
    disk { 
                on-io-error detach; #配置I/O错误处理策略为分离 
        # size max-bio-bvecs on-io-error fencing disk-barrier disk-flushes 
        # disk-drain md-flushes resync-rate resync-after al-extents 
                # c-plan-ahead c-delay-target c-fill-target c-max-rate 
                # c-min-rate disk-timeout 
    } 
    net { 
        # protocol timeout max-epoch-size max-buffers unplug-watermark 
        # connect-int ping-int sndbuf-size rcvbuf-size ko-count 
        # allow-two-primaries cram-hmac-alg shared-secret after-sb-0pri 
        # after-sb-1pri after-sb-2pri always-asbp rr-conflict 
        # ping-timeout data-integrity-alg tcp-cork on-congestion 
        # congestion-fill congestion-extents csums-alg verify-alg 
        # use-rle 
    } 
      syncer { 
              rate 1024M; #设置主备节点同步时的网络速率 

}
----------------------------------------
[root@node1 drbd.d]# cat global_common.conf
global {
 usage-count yes;
 # minor-count dialog-refresh disable-ip-verification
}
common {
 protocol C;
 handlers {
  # These are EXAMPLE handlers only.
  # They may have severe implications,
  # like hard resetting the node under certain circumstances.
  # Be careful when chosing your poison.
   pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
   pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
   local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
  # fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
  # split-brain "/usr/lib/drbd/notify-split-brain.sh root";
  # out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
  # before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
  # after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
 }
 startup {
  # wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb
 }
 options {
  # cpu-mask on-no-data-accessible
 }
 disk {
  on-io-error detach;
  # size max-bio-bvecs on-io-error fencing disk-barrier disk-flushes
  # disk-drain md-flushes resync-rate resync-after al-extents
                # c-plan-ahead c-delay-target c-fill-target c-max-rate
                # c-min-rate disk-timeout
 }
 net {
  # protocol timeout max-epoch-size max-buffers unplug-watermark
  # connect-int ping-int sndbuf-size rcvbuf-size ko-count
  # allow-two-primaries cram-hmac-alg shared-secret after-sb-0pri
  # after-sb-1pri after-sb-2pri always-asbp rr-conflict
  # ping-timeout data-integrity-alg tcp-cork on-congestion
  # congestion-fill congestion-extents csums-alg verify-alg
  # use-rle
 }
 syncer {
  rate 1024M;
 }
}
=========================================================================
##########################资源配置文件配置如下  需自己新建#################### 
[root@node1 ~]# vim /usr/local/drbd/etc/drbd.d/drbd.res 
resource  r1 {  #这个r1是定义资源的名字 
          on  node1 {            #on开头,后面是主机名称 
          device    /dev/drbd0;  #drbd设备名称 
          disk      /dev/sdb1;  #drbd0使用的磁盘分区为sdb1 
          address  192.168.200.157:7789; #设置drbd监听地址与端口 
          meta-disk  internal; 
      } 
          on  node2 {            #on开头,后面是主机名称 
          device    /dev/drbd0;  #drbd设备名称 
          disk      /dev/sdb1;  #drbd0使用的磁盘分区为sdb1 
          address  10.0.0.106:7789; #设置drbd监听地址与端口 
          meta-disk  internal; 
      } 
}
--------------------------------------------------
resource  r0 {
          on  node1 {
          device    /dev/drbd0;
          disk      /dev/sdb1;
          address  192.168.200.157:7789;
          meta-disk  internal;
      }
          on  node2 {
          device    /dev/drbd0;
          disk      /dev/sdb1;
          address  192.168.200.158:7789;
          meta-disk  internal;
      }
}
##################################################################### 
在node2上做如上同样的操作。
DRBD的启动:
[root@node1 ~]# drbdadm create-md all
如果有故障的话,解决此故障可以执行如下操作:
[root@node1 ~]# dd if=/dev/zero of=/dev/sdb1 bs=1M count=128
之后再次执行:
[root@node1 ~]# drbdadm create-md all
The server's response is:
you are the 23335th user to install this version
Writing meta data...
initializing activity log
NOT initializing bitmap
New drbd meta data block successfully created.
success
---------------------------------------------
[root@node1 ~]# service drbd start
=================================================
[root@node2 drbd.d]# drbdadm create-md all
  --==  Thank you for participating in the global usage survey  ==--
The server's response is:
you are the 23336th user to install this version
Writing meta data...
md_offset 10733953024
al_offset 10733920256
bm_offset 10733592576
Found ext3 filesystem
    10482380 kB data area apparently used
    10482024 kB left usable by current configuration
Device size would be truncated, which
would corrupt data and result in
'access beyond end of device' errors.
You need to either
   * use external meta data (recommended)
   * shrink that filesystem first
   * zero out the device (destroy the filesystem)
Operation refused.
Command 'drbdmeta 0 v08 /dev/sdb1 internal create-md' terminated with exit code 40
------------------
如果出上述报错信息可以进行以下操作:
[root@node1 ~]# dd if=/dev/zero of=/dev/sdb1 bs=1M count=128
之后再次执行:
[root@node1 ~]# drbdadm create-md all
Writing meta data...
initializing activity log
NOT initializing bitmap
New drbd meta data block successfully created.
success
---------------------------------------
[root@node1 ~]# drbdadm primary all
0: State change failed: (-2) Need access to UpToDate data
Command 'drbdsetup primary 0' terminated with exit code 17
[root@node1 ~]# drbdadm -- --overwrite-data-of-peer primary all
[root@node1 ~]# mkfs.ext4 /dev/drbd0
============================================================
在node2上的操作:
[root@node2 drbd.d]# service drbd start
[root@node2 drbd.d]# cat /proc/drbd 
version: 8.4.3 (api:1/proto:86-101)
GIT-hash: 89a294209144b68adb3ee85a73221f964d3ee515 build by root@node2, 2014-12-14 22:28:31
 0: cs:SyncTarget ro:Secondary/Primary ds:Inconsistent/UpToDate C r-----
    ns:0 nr:8705772 dw:8705772 dr:0 al:0 bm:529 lo:1 pe:10 ua:0 ap:0 ep:1 wo:f oos:1811712
 [===============>....] sync'ed: 82.8% (1768/10236)M
 finish: 0:00:18 speed: 97,072 (97,416) want: 101,200 K/sec
[root@node2 drbd.d]# cat /proc/drbd 
version: 8.4.3 (api:1/proto:86-101)
GIT-hash: 89a294209144b68adb3ee85a73221f964d3ee515 build by root@node2, 2014-12-14 22:28:31
 0: cs:Connected ro:Secondary/Primary ds:UpToDate/UpToDate C r-----
    ns:0 nr:10517484 dw:10517484 dr:0 al:0 bm:640 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0
=====================================================================
在node1上的操作:
[root@node1 ~]# mkdir /database
[root@node1 ~]# mount /dev/drbd0 /database/
[root@node1 ~]# df -hT |grep database
/dev/drbd0                   ext4     9.9G  151M  9.2G   2% /database
========================================================================
下面对MySQL进行部署:
在node1上的操作:
[root@node1 ~]# yum -y install mysql-server mysql-devel mysql mysql-bench mysql-test
[root@node1 ~]# service mysqld start
将数据文件放到DRBD分区上:
[root@node1 ~]# cp -R /var/lib/mysql /database/mysql
[root@node1 ~]# chown -R mysql:mysql /database/mysql/
[root@node1 ~]# vi /etc/my.cnf
[mysqld]
datadir=/database/mysql
.... 以下内容省略...
==========================
heartbeat 的安装
============================================================================
在node1上的操作:
[root@node1 ~]# cat /etc/resolv.conf 
# Generated by NetworkManager
nameserver 192.168.200.1
nameserver 202.106.0.20
[root@node1 ~]# wget http://mirrors.sohu.com/fedora-epel/6/x86_64/epel-release-6-8.noarch.rpm
[root@node1 ~]# ls -l epel-release-6-8.noarch.rpm 
-rw-r--r--. 1 root root 14540 11月  5 2012 epel-release-6-8.noarch.rpm
[root@node1 ~]# rpm -ivh epel-release-6-8.noarch.rpm 
warning: epel-release-6-8.noarch.rpm: Header V3 RSA/SHA256 Signature, key ID 0608b895: NOKEY
Preparing...                ########################################### [100%]
   1:epel-release           ########################################### [100%]
[root@node1 ~]# ls /etc/yum.repos.d/epel*
/etc/yum.repos.d/epel.repo  /etc/yum.repos.d/epel-testing.repo
[root@node1 ~]# yum -y install heartbeat heartbeat-devel heartbeat-stonith heartbeat-pils
=============================================================================
以上步骤同理在node2上做同样操作
---------------------------
在node1上进行以下操作:
创建heartbeat的配置文件
[root@node1 ~]# cp /usr/share/doc/heartbeat-3.0.4/ha.cf /etc/ha.d/ha.cf
[root@node1 ~]# cp /usr/share/doc/heartbeat-3.0.4/haresources /etc/ha.d/haresources
[root@node1 ~]# cp /usr/share/doc/heartbeat-3.0.4/authkeys /etc/ha.d/authkeys
[root@node1 ~]# chmod 600 /etc/ha.d/authkeys 
=========================================================================
根据当前情况,修改ha.cf文件
[root@node1 ha.d]# cat ha.cf 
#
# There are lots of options in this file.  All you have to have is a set
# of nodes listed {"node ...} one of {serial, bcast, mcast, or ucast},
# and a value for "auto_failback".
#
# ATTENTION: As the configuration file is read line by line,
#     THE ORDER OF DIRECTIVE MATTERS!
#
# In particular, make sure that the udpport, serial baud rate
# etc. are set before the heartbeat media are defined!
# debug and log file directives go into effect when they
# are encountered.
#
# All will be fine if you keep them ordered as in this example.
#
#
#       Note on logging:
#       If all of debugfile, logfile and logfacility are not defined, 
#       logging is the same as use_logd yes. In other case, they are
#       respectively effective. if detering the logging to syslog,
#       logfacility must be "none".
#
# File to write debug messages to
#debugfile /var/log/ha-debug
#
#
#  File to write other messages to
#
logfile /var/log/ha-log
#
#
# Facility to use for syslog()/logger 
#
logfacility local0
#
#
# A note on specifying "how long" times below...
#
# The default time unit is seconds
#  10 means ten seconds
#
# You can also specify them in milliseconds
#  1500ms means 1.5 seconds
#
#
# keepalive: how long between heartbeats?
#
keepalive 1
#
# deadtime: how long-to-declare-host-dead?
#
#  If you set this too low you will get the problematic
#  split-brain (or cluster partition) problem.
#  See the FAQ for how to use warntime to tune deadtime.
#
deadtime 15
#
# warntime: how long before issuing "late heartbeat" warning?
# See the FAQ for how to use warntime to tune deadtime.
#
warntime 5
#
#
# Very first dead time (initdead)
#
# On some machines/OSes, etc. the network takes a while to come up
# and start working right after you've been rebooted.  As a result
# we have a separate dead time for when things first come up.
# It should be at least twice the normal dead time.
#
initdead 30
#
#
# What UDP port to use for bcast/ucast communication?
#
#udpport 694
#
# Baud rate for serial ports...
#
#baud 19200

# serial serialportname ...
#serial /dev/ttyS0 # Linux
#serial /dev/cuaa0 # FreeBSD
#serial /dev/cuad0      # FreeBSD 6.x
#serial /dev/cua/a # Solaris
#
#
# What interfaces to broadcast heartbeats over?
#
#bcast eth0  # Linux
#bcast eth1 eth2 # Linux
#bcast le0  # Solaris
#bcast le1 le2  # Solaris
#
# Set up a multicast heartbeat medium
# mcast [dev] [mcast group] [port] [ttl] [loop]
#
# [dev]  device to send/rcv heartbeats on
# [mcast group] multicast group to join (class D multicast address
#   224.0.0.0 - 239.255.255.255)
# [port]  udp port to sendto/rcvfrom (set this value to the
#   same value as "udpport" above)
# [ttl]  the ttl value for outbound heartbeats.  this effects
#   how far the multicast packet will propagate.  (0-255)
#   Must be greater than zero.
# [loop]  toggles loopback for outbound multicast heartbeats.
#   if enabled, an outbound packet will be looped back and
#   received by the interface it was sent on. (0 or 1)
#   Set this value to zero.
#  
#
#mcast eth0 225.0.0.1 694 1 0
#
# Set up a unicast / udp heartbeat medium
# ucast [dev] [peer-ip-addr]
#
# [dev]  device to send/rcv heartbeats on
# [peer-ip-addr] IP address of peer to send packets to
#
ucast eth0 192.168.200.158
ucast eth1 192.168.1.158
#
#
# About boolean values...
#
# Any of the following case-insensitive values will work for true:
#  true, on, yes, y, 1
# Any of the following case-insensitive values will work for false:
#  false, off, no, n, 0
#
#
#
# auto_failback:  determines whether a resource will
# automatically fail back to its "primary" node, or remain
# on whatever node is serving it until that node fails, or
# an administrator intervenes.
#
# The possible values for auto_failback are:
#  on - enable automatic failbacks
#  off - disable automatic failbacks
#  legacy - enable automatic failbacks in systems
#   where all nodes do not yet support
#   the auto_failback option.
#
# auto_failback "on" and "off" are backwards compatible with the old
#  "nice_failback on" setting.
#
# See the FAQ for information on how to convert
#  from "legacy" to "on" without a flash cut.
#  (i.e., using a "rolling upgrade" process)
#
# The default value for auto_failback is "legacy", which
# will issue a warning at startup.  So, make sure you put
# an auto_failback directive in your ha.cf file.
# (note: auto_failback can be any boolean or "legacy")
#
auto_failback off
#
#
#       Basic STONITH support
#       Using this directive assumes that there is one stonith 
#       device in the cluster.  Parameters to this device are 
#       read from a configuration file. The format of this line is:
#
#         stonith 
#
#       NOTE: it is up to you to maintain this file on each node in the
#       cluster!
#
#stonith baytech /etc/ha.d/conf/stonith.baytech
#
#       STONITH support
#       You can configure multiple stonith devices using this directive.
#       The format of the line is:
#         stonith_host 
#         is the machine the stonith device is attached
#              to or * to mean it is accessible from any host. 
#         is the type of stonith device (a list of
#              supported drives is in /usr/lib/stonith.)
#         are driver specific parameters.  To see the
#              format for a particular device, run:
#           stonith -l -t 
#
#
# Note that if you put your stonith device access information in
# here, and you make this file publically readable, you're asking
# for a denial of service attack ;-)
#
# To get a list of supported stonith devices, run
#  stonith -L
# For detailed information on which stonith devices are supported
# and their detailed configuration options, run this command:
#  stonith -h
#
#stonith_host *     baytech 10.0.0.3 mylogin mysecretpassword
#stonith_host ken3  rps10 /dev/ttyS1 kathy 0 
#stonith_host kathy rps10 /dev/ttyS1 ken3 0 
#
# Watchdog is the watchdog timer.  If our own heart doesn't beat for
# a minute, then our machine will reboot.
# NOTE: If you are using the software watchdog, you very likely
# wish to load the module with the parameter "nowayout=0" or
# compile it without CONFIG_WATCHDOG_NOWAYOUT set. Otherwise even
# an orderly shutdown of heartbeat will trigger a reboot, which is
# very likely NOT what you want.
#
#watchdog /dev/watchdog
#       
# Tell what machines are in the cluster
# node nodename ... -- must match uname -n
node node1
node node2
#
# Less common options...
#
# Treats 10.10.10.254 as a psuedo-cluster-member
# Used together with ipfail below...
# note: don't use a cluster node as ping node 
#
#ping 10.10.10.254
#
# Treats 10.10.10.254 and 10.10.10.253 as a psuedo-cluster-member
#       called group1. If either 10.10.10.254 or 10.10.10.253 are up
#       then group1 is up
# Used together with ipfail below...
#
#ping_group group1 10.10.10.254 10.10.10.253
#
# HBA ping derective for Fiber Channel
# Treats fc-card-name as psudo-cluster-member
# used with ipfail below ...
#
# You can obtain HBAAPI from http://hbaapi.sourceforge.net.  You need 
# to get the library specific to your HBA directly from the vender
# To install HBAAPI stuff, all You need to do is to compile the common
# part you obtained from the sourceforge. This will produce libHBAAPI.so 
# which you need to copy to /usr/lib. You need also copy hbaapi.h to 
# /usr/include.

# The fc-card-name is the name obtained from the hbaapitest program 
# that is part of the hbaapi package. Running hbaapitest will produce
# a verbose output. One of the first line is similar to:
#  Apapter number 0 is named: qlogic-qla2200-0
# Here fc-card-name is qlogic-qla2200-0.  
#
#hbaping fc-card-name
#
#
# Processes started and stopped with heartbeat.  Restarted unless
#  they exit with rc=100
#
#respawn userid /path/name/to/run
respawn hacluster /usr/lib64/heartbeat/ipfail
#
# Access control for client api
#        default is no access
#
respawn hacluster /usr/lib64/heartbeat/dopd
apiauth ipfail gid=haclient uid=hacluster
apiauth dopd gid=haclient uid=hacluster
#apiauth client-name gid=gidlist uid=uidlist
#apiauth ipfail gid=haclient uid=hacluster
###########################
#
# Unusual options.
#
###########################
#
# hopfudge maximum hop count minus number of nodes in config
#hopfudge 1
#
# deadping - dead time for ping nodes
#deadping 30
#
# hbgenmethod - Heartbeat generation number creation method
#  Normally these are stored on disk and incremented as needed.
#hbgenmethod time
#
# realtime - enable/disable realtime execution (high priority, etc.)
#  defaults to on
#realtime off
#
# debug - set debug level
#  defaults to zero
#debug 1
#
# API Authentication - replaces the fifo-permissions-based system of the past
#
#
# You can put a uid list and/or a gid list.
# If you put both, then a process is authorized if it qualifies under either
# the uid list, or under the gid list.
#
# The groupname "default" has special meaning.  If it is specified, then
# this will be used for authorizing groupless clients, and any client groups
# not otherwise specified.

# There is a subtle exception to this.  "default" will never be used in the 
# following cases (actual default auth directives noted in brackets)
#    ipfail  (uid=HA_CCMUSER)
#    ccm    (uid=HA_CCMUSER)
#    ping  (gid=HA_APIGROUP)
#    cl_status (gid=HA_APIGROUP)
#
# This is done to avoid creating a gaping security hole and matches the most
# likely desired configuration.
#
#apiauth ipfail uid=hacluster
#apiauth ccm uid=hacluster
#apiauth cms uid=hacluster
#apiauth ping gid=haclient uid=alanr,root
#apiauth default gid=haclient
#  message format in the wire, it can be classic or netstring, 
# default: classic
#msgfmt  classic/netstring
# Do we use logging daemon?
# If logging daemon is used, logfile/debugfile/logfacility in this file
# are not meaningful any longer. You should check the config file for logging
# daemon (the default is /etc/logd.cf)
# more infomartion can be fould in the man page.
# Setting use_logd to "yes" is recommended

# use_logd yes/no
#
# the interval we  reconnect to logging daemon if the previous connection failed
# default: 60 seconds
#conn_logd_time 60
#
#
# Configure compression module
# It could be zlib or bz2, depending on whether u have the corresponding 
# library in the system.
#compression bz2
#
# Confiugre compression threshold
# This value determines the threshold to compress a message,
# e.g. if the threshold is 1, then any message with size greater than 1 KB
# will be compressed, the default is 2 (KB)
#compression_threshold 2
================================================================
[root@node1 ha.d]# cat haresources
node1 drbddisk::r0 Filesystem::/dev/drbd0::/database mysqld IPaddr::192.168.200.222/24/eth0
===================================================================
[root@node1 ha.d]# cat authkeys 
auth 1
#1 crc
1 sha1 abc
===========================
[root@node1 resource.d]# pwd
/etc/ha.d/resource.d
[root@node1 resource.d]# vi drbddisk
#!/bin/bash
#
# This script is inteded to be used as resource script by heartbeat
#
# Copright 2003-2008 LINBIT Information Technologies
# Philipp Reisner, Lars Ellenberg
#
###
DEFAULTFILE="/etc/default/drbd"
DRBDADM="/sbin/drbdadm"
if [ -f $DEFAULTFILE ]; then
. $DEFAULTFILE
fi
if [ "$#" -eq 2 ]; then
RES="$1"
CMD="$2"
else
RES="all"
CMD="$1"
fi
## EXIT CODES
# since this is a "legacy heartbeat R1 resource agent" script,
# exit codes actually do not matter that much as long as we conform to
# http://wiki.linux-ha.org/HeartbeatResourceAgent
# but it does not hurt to conform to lsb init-script exit codes,
# where we can.
# http://refspecs.linux-foundation.org/LSB_3.1.0/
#LSB-Core-generic/LSB-Core-generic/iniscrptact.html
####
drbd_set_role_from_proc_drbd()
{
local out
if ! test -e /proc/drbd; then
ROLE="Unconfigured"
return
fi
dev=$( $DRBDADM sh-dev $RES )
minor=${dev#/dev/drbd}
if [[ $minor = *[!0-9]* ]] ; then
# sh-minor is only supported since drbd 8.3.1
minor=$( $DRBDADM sh-minor $RES )
fi
if [[ -z $minor ]] || [[ $minor = *[!0-9]* ]] ; then
ROLE=Unknown
return
fi
if out=$(sed -ne "/^ *$minor: cs:/ { s/:/ /g; p; q; }" /proc/drbd); then
set -- $out
ROLE=${5%/**}
: ${ROLE:=Unconfigured} # if it does not show up
else
ROLE=Unknown
fi
}
case "$CMD" in
start)
# try several times, in case heartbeat deadtime
# was smaller than drbd ping time
try=6
while true; do
$DRBDADM primary $RES && break
let "--try" || exit 1 # LSB generic error
sleep 1
done
;;
stop)
# heartbeat (haresources mode) will retry failed stop
# for a number of times in addition to this internal retry.
try=3
while true; do
$DRBDADM secondary $RES && break
# We used to lie here, and pretend success for anything != 11,
# to avoid the reboot on failed stop recovery for "simple
# config errors" and such. But that is incorrect.
# Don't lie to your cluster manager.
# And don't do config errors...
let --try || exit 1 # LSB generic error
sleep 1
done
;;
status)
if [ "$RES" = "all" ]; then
echo "A resource name is required for status inquiries."
exit 10
fi
ST=$( $DRBDADM role $RES )
ROLE=${ST%/**}
case $ROLE in
Primary|Secondary|Unconfigured)
# expected
;;
*)
# unexpected. whatever...
# If we are unsure about the state of a resource, we need to
# report it as possibly running, so heartbeat can, after failed
# stop, do a recovery by reboot.
# drbdsetup may fail for obscure reasons, e.g. if /var/lock/ is
# suddenly readonly. So we retry by parsing /proc/drbd.
drbd_set_role_from_proc_drbd
esac
case $ROLE in
Primary)
echo "running (Primary)"
exit 0 # LSB status "service is OK"
;;
Secondary|Unconfigured)
echo "stopped ($ROLE)"
exit 3 # LSB status "service is not running"
;;
*)
# NOTE the "running" in below message.
# this is a "heartbeat" resource script,
# the exit code is _ignored_.
echo "cannot determine status, may be running ($ROLE)"
exit 4 # LSB status "service status is unknown"
;;
esac
;;
*)
echo "Usage: drbddisk [resource] {start|stop|status}"
exit 1
;;
esac
exit 0
###################################################
[root@node1 resource.d]# chmod +x drbddisk
[root@node1 /]# scp authkeys ha.cf haresources node2:/etc/ha.d/
[root@node1 /]# scp /etc/ha.d/resource.d/drbddisk node2:/etc/ha.d/resource.d/
----------------------------------------------------------------
在node2上的操作:
在node2上只用对/etc/ha.d/ha.cf改两行就行
[root@node2 ha.d]# vi ha.cf
ucast eth0 192.168.200.157
ucast eth1 192.168.1.157
在node1及node2上都需要做如下的操作:
[root@node1 ~]# ln -s /etc/init.d/mysqld /etc/ha.d/resource.d/mysqld
[root@node1 ~]# service heartbeat restart
========================================================
接下来设置mysql数据库
在dbs1(192.168.200.159)上的操作
[root@dbs1 ~]# yum -y install mysql-server mysql-devel mysql mysql-bench mysql-test
同理:在dbs2(192.168.200.160)上做如上操作
[root@dbs2 ~]# yum -y install mysql-server mysql-devel mysql mysql-bench mysql-test
===========================================================
在node1、node2上编辑配置文件my.cnf
[root@node1 ~]# vi /etc/my.cnf 
[root@node1 ~]# cat /etc/my.cnf 
[mysqld]
datadir=/database/mysql
server-id = 1
log-bin=mysql-bin
binlog-do-db=aaa
binlog-ignore-db=mysql
socket=/var/lib/mysql/mysql.sock
user=mysql
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
[mysqld_safe]
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
==================================================================
在master节点node1、node2上建立复制用户
mysql> grant replication slave on *.* to 'bakcup'@'%' identified by '123456';
[root@node1 ha.d]# service mysqld restart
=======================================================
备份master数据
mysql> flush tables with read lock;
Query OK, 0 rows affected (0.00 sec)
mysql> reset master;
Query OK, 0 rows affected (0.04 sec)
mysql> 
不要退出这个终端
[root@node1 database]# tar zcvf mysql.tar.gz mysql/
[root@node1 database]# scp mysql.tar.gz 192.168.200.159:/var/lib/
[root@node1 database]# scp mysql.tar.gz 192.168.200.160:/var/lib/
等传输完毕,在上面SQL语句后面进行解锁操作
mysql> unlock tables;
=======================================================
设置Slave主机(dbs1 dbs2)
编辑/etc/my.cnf文件,在[mysqld]中下面添加:
[root@dbs1 ~]# vi /etc/my.cnf 
[root@dbs1 ~]# cat /etc/my.cnf 
[mysqld]
datadir=/var/lib/mysql
server-id=2
log-bin=mysql-bin
binlog-do-db=aaa
binlog-do-db=ccc
binlog-ignore-db=mysql
socket=/var/lib/mysql/mysql.sock
user=mysql
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
[mysqld_safe]
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
===============================================
[root@dbs1 lib]# rm -rf mysql
[root@dbs1 lib]# tar xf mysql.tar.gz -C ./
################################################
在dbs2上做如上同样的操作,只是要修改my.cnf中的一个参数
server-id=3
################################################
分别启动dbs1及dbs2上的mysqld服务
[root@dbs1 lib]# service mysqld start
正在启动 mysqld:                                          [确定]
==================================================================
在slave的dbs1及dbs2上执行如下命令
mysql> change master to master_host='192.168.200.222',
    -> master_user='bakcup',
    -> master_password='123456',
    -> master_log_file='mysql-bin.000001',
    -> master_log_pos=106;
Query OK, 0 rows affected (0.04 sec)
mysql> slave start;
Query OK, 0 rows affected (0.00 sec)
mysql> show slave status\G;
*************************** 1. row ***************************
               Slave_IO_State: Waiting for master to send event
                  Master_Host: 192.168.200.222
                  Master_User: bakcup
                  Master_Port: 3306
                Connect_Retry: 60
              Master_Log_File: mysql-bin.000001
          Read_Master_Log_Pos: 106
               Relay_Log_File: mysqld-relay-bin.000002
                Relay_Log_Pos: 251
        Relay_Master_Log_File: mysql-bin.000001
             Slave_IO_Running: Yes
            Slave_SQL_Running: Yes
              Replicate_Do_DB: 
          Replicate_Ignore_DB: 
           Replicate_Do_Table: 
       Replicate_Ignore_Table: 
      Replicate_Wild_Do_Table: 
  Replicate_Wild_Ignore_Table: 
                   Last_Errno: 0
                   Last_Error: 
                 Skip_Counter: 0
          Exec_Master_Log_Pos: 106
              Relay_Log_Space: 407
              Until_Condition: None
               Until_Log_File: 
                Until_Log_Pos: 0
           Master_SSL_Allowed: No
           Master_SSL_CA_File: 
           Master_SSL_CA_Path: 
              Master_SSL_Cert: 
            Master_SSL_Cipher: 
               Master_SSL_Key: 
        Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
                Last_IO_Errno: 0
                Last_IO_Error: 
               Last_SQL_Errno: 0
               Last_SQL_Error: 
1 row in set (0.00 sec)
ERROR: 
No query specified
===============================================\
###################################################################
下面做keepalived + mysql
首先,在dbs1及dbs2上创建启动浮动IP的脚本如下:
[root@dbs1 ~]# vi /opt/DR.sh
#!/bin/bash
#description : start realserver
VIP=192.168.200.223
. /etc/rc.d/init.d/functions
case "$1" in
    start)
        echo " start LVS  of  REALServer"
 /sbin/ifconfig lo:0 $VIP broadcast $VIP netmask 255.255.255.255 up
        echo "1" >/proc/sys/net/ipv4/conf/lo/arp_ignore
        echo "2" >/proc/sys/net/ipv4/conf/lo/arp_announce
        echo "1" >/proc/sys/net/ipv4/conf/all/arp_ignore
        echo "2" >/proc/sys/net/ipv4/conf/all/arp_announce
        ;;
    stop)
        /sbin/ifconfig lo:0 down
        echo "close LVS REALserver"
        echo "0" >/proc/sys/net/ipv4/conf/lo/arp_ignore
        echo "0" >/proc/sys/net/ipv4/conf/lo/arp_announce
        echo "0" >/proc/sys/net/ipv4/conf/all/arp_ignore
        echo "0" >/proc/sys/net/ipv4/conf/all/arp_announce
        ;;
    *)
        echo "Usage: $0 {start|stop}"
        exit 1
esac
========================================
[root@dbs1 ~]# chmod +x /opt/DR.sh 
[root@dbs1 ~]# /opt/DR.sh start
 start LVS  of  REALServer
[root@dbs1 ~]# ip addr show
1: lo: mtu 16436 qdisc noqueue state UNKNOWN 
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
    inet 192.168.200.223/32 brd 192.168.200.223 scope global lo:0
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: mtu 1500 qdisc pfifo_fast state UP qlen 1000
    link/ether 00:0c:29:32:69:0b brd ff:ff:ff:ff:ff:ff
    inet 192.168.200.159/24 brd 192.168.200.255 scope global eth0
    inet6 fe80::20c:29ff:fe32:690b/64 scope link 
       valid_lft forever preferred_lft forever
===========================
同理在dbs2上做如上操作
============================================================================
利用LVS+Keepalived搭建高可用MySQL Slave集群
1、分别在KEEP1 KEEP2上安装keepalived和ipvsadm
[root@localhost ~]# yum -y install keepalived ipvsadm
2、在KEEP1上创建keepalived.conf配置文件
[root@localhost ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
   notification_email {
     acassen@firewall.loc
     failover@firewall.loc
     sysadmin@firewall.loc
   }
   notification_email_from Alexandre.Cassen@firewall.loc
   smtp_server 192.168.200.1
   smtp_connect_timeout 30
   router_id LVS_DEVEL_1
}
vrrp_instance VI_1 {
    state SLAVE
    interface eth0
    virtual_router_id 51
    priority 180
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.200.223
    }
}
virtual_server 192.168.200.223 3306 {
    delay_loop 30
    lb_algo rr
    lb_kind DR
    persistence_timeout 120
    protocol TCP
    real_server 192.168.200.159 3306 {
 MISC_CHECK {
  misc_path "/etc/keepalived/check_slave.pl 192.168.200.159"
  misc_dynamic
 }
    }
    real_server 192.168.200.160 3306 {
 MISC_CHECK {
  misc_path "/etc/keepalived/check_slave.pl 192.168.200.160"
  misc_dynamic
 }
    }
}
----------------------------------------------------
[root@localhost ~]# cd /etc/keepalived/
[root@localhost keepalived]# ls
check_slave.pl  keepalived.conf  keepalived.conf.origin
[root@localhost keepalived]# cat check_slave.pl 
#!/usr/bin/perl -w
use DBI;
use DBD::mysql;
# CONFIG VARIABLES
$SBM = 120;
$db = "ixdba";
$host = $ARGV[0];
$port = 3306;
$user = "root";
$pw = "mysql";
# SQL query
$query = "show slave status";
$dbh = DBI->connect("DBI:mysql:$db:$host:$port", $user, $pw, { RaiseError => 0,PrintError => 0 });
if (!defined($dbh)) {
    exit 1;
}
$sqlQuery = $dbh->prepare($query);
$sqlQuery->execute;
$Slave_IO_Running =  "";
$Slave_SQL_Running = "";
$Seconds_Behind_Master = "";
while (my $ref = $sqlQuery->fetchrow_hashref()) {
    $Slave_IO_Running = $ref->{'Slave_IO_Running'};
    $Slave_SQL_Running = $ref->{'Slave_SQL_Running'};
    $Seconds_Behind_Master = $ref->{'Seconds_Behind_Master'};
}
$sqlQuery->finish;
$dbh->disconnect();
if ( $Slave_IO_Running eq "No" || $Slave_SQL_Running eq "No" ) {
    exit 1;
} else {
    if ( $Seconds_Behind_Master > $SBM ) {
        exit 1;
    } else {
        exit 0;
    }
}
=============================================================
同理在KEEP2上做如上配置。
[root@dbs147 ~]# cd /etc/keepalived/
[root@dbs147 keepalived]# ls
check_slave.pl  keepalived.conf  keepalived.conf.origin
[root@dbs147 keepalived]# cat keepalived.conf
! Configuration File for keepalived
global_defs {
   notification_email {
     acassen@firewall.loc
     failover@firewall.loc
     sysadmin@firewall.loc
   }
   notification_email_from Alexandre.Cassen@firewall.loc
   smtp_server 192.168.200.1
   smtp_connect_timeout 30
   router_id LVS_DEVEL_2
}
vrrp_instance VI_1 {
    state SLAVE
    interface eth0
    virtual_router_id 51
    priority 150
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.200.223
    }
}
virtual_server 192.168.200.223 3306 {
    delay_loop 30
    lb_algo rr
    lb_kind DR
    persistence_timeout 120
    protocol TCP
    real_server 192.168.200.159 3306 {
 MISC_CHECK {
  misc_path "/etc/keepalived/check_slave.pl 192.168.200.159"
  misc_dynamic
 }
    }
    real_server 192.168.200.160 3306 {
 MISC_CHECK {
  misc_path "/etc/keepalived/check_slave.pl 192.168.200.160"
  misc_dynamic
 }
    }
}
[root@dbs147 keepalived]# cat check_slave.pl 
#!/usr/bin/perl -w
use DBI;
use DBD::mysql;
# CONFIG VARIABLES
$SBM = 120;
$db = "ixdba";
$host = $ARGV[0];
$port = 3306;
$user = "root";
$pw = "mysql";
# SQL query
$query = "show slave status";
$dbh = DBI->connect("DBI:mysql:$db:$host:$port", $user, $pw, { RaiseError => 0,PrintError => 0 });
if (!defined($dbh)) {
    exit 1;
}
$sqlQuery = $dbh->prepare($query);
$sqlQuery->execute;
$Slave_IO_Running =  "";
$Slave_SQL_Running = "";
$Seconds_Behind_Master = "";
while (my $ref = $sqlQuery->fetchrow_hashref()) {
    $Slave_IO_Running = $ref->{'Slave_IO_Running'};
    $Slave_SQL_Running = $ref->{'Slave_SQL_Running'};
    $Seconds_Behind_Master = $ref->{'Seconds_Behind_Master'};
}
$sqlQuery->finish;
$dbh->disconnect();
if ( $Slave_IO_Running eq "No" || $Slave_SQL_Running eq "No" ) {
    exit 1;
} else {
    if ( $Seconds_Behind_Master > $SBM ) {
        exit 1;
    } else {
        exit 0;
    }
}
=========================================================
最后分别在KEEP1及KEEP2上启动keepalived
[root@localhost keepalived]# service keepalived restart
停止 keepalived:                                          [确定]
正在启动 keepalived:                                      [确定]
----------------------------------------------------------------
[root@localhost keepalived]# ip addr 
1: lo: mtu 16436 qdisc noqueue state UNKNOWN 
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: eth0: mtu 1500 qdisc pfifo_fast state UP qlen 1000
    link/ether 00:0c:29:8b:d7:30 brd ff:ff:ff:ff:ff:ff
    inet 192.168.200.146/24 brd 192.168.200.255 scope global eth0
    inet 192.168.200.223/32 scope global eth0
    inet6 fe80::20c:29ff:fe8b:d730/64 scope link 
       valid_lft forever preferred_lft forever
[root@localhost keepalived]# ipvsadm -L -n
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
  -> RemoteAddress:Port           Forward Weight ActiveConn InActConn
TCP  192.168.200.223:3306 rr persistent 120
  -> 192.168.200.159:3306         Route   0      0          0         
  -> 192.168.200.160:3306         Route   0      0          0 
=====================================================================
 

,multicast,up,lower_up>,up,lower_up>,multicast,up,lower_up>,up,lower_up>...>...>,multicast,up,lower_up>,multicast,up,lower_up>,up,lower_up>