由于公司业务服务器分布比较广,如果按照之前的监控架构的话,就是每个IDC增加一个nagios监控,想来这样子也有好处,可以互相监控,但是由于每个IDC部署一个nagios,无疑增加了监控人员的查看难度,所以就研究了一下分布式nagios监控。

一,分角色

1,监控中心服务器,分布式服务器,被监控服务器

监控中心服务器:通过NSCA获取分布式监控服务器的相关状态,呈现相关服务器状态和发出报警等;

分布式服务器:通过对被监控服务器状态采集并且把被监控服务器的状态通过NSCA_send发送给监控中心服务器。

被监控服务器:被监控服务器就是生产环境服务器。

nagios分布式部署详细文档 _nagios 监控 分布式

二,详细部署

1,被监控服务器

tar -zxvf nagios-plugins-1.4.15.tar.gz

cd nagios-plugins-1.4.15

./configure

make

make install

chown nagios.nagios /usr/local/nagios

chown nagios.nagios /usr/local/nagios

cd ..

ls

tar -zxvf nrpe-2.12.tar.gz 

pwd

ls

cd nrpe-2.12

./configure 

make all

make install-plugin

make install-daemon

make install-daemon-config

vi /usr/local/nagios/etc/nrpe.cfg

将allowed_hosts=127.0.0.1

修改成你的nagios分布式服务器的ip

/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d    #启动nrpe

netstat -anl|grep 5666 #测试监听端口

2,安装分布式服务器

useradd nagios

passwd nagios

groupadd nagcmd

usermod -G nagcmd nagios

usermod -G nagcmd apache           #创建Nagios用户 创建组 把用户加入组 并加入apche

tar -zxvf nagios-3.2.3.tar.gz 

cd nagios-3.2.3

./configure --with-command-group=nagcmd

make all

make install

make install-init

make install-config

make install-commandmode

tar -zxvf nagios-plugins-1.4.15.tar.gz 

cd nagios-plugins-1.4.15

./configure --with-nagios-user=nagios --with-nagios-group=nagcmd

make

make install

chkconfig --add nagios

chkconfig nagios on

tar -zxvf nrpe-2.12.tar.gz 

cd nrpe-2.12

./configure 

make all

make install-plugin

/usr/local/nagios/libexec/check_nrpe -H 192.168.20.100                   #测试被监控服务器是否连通,正常情况下会返回被监控端的NRPE版本

vi /usr/local/nagios/etc/objects/commands.cfg

#check nrpe

define command{

       command_name check_nrpe

       command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$

       }                                                                #添加nrpe外部检测命令

tar -zxvf nsca-2.7.2.tar.gz 

cd nsca-2.7.2

./configure

make all

cp sample-config/send_nsca.cfg /usr/local/nagios/etc/

cd /usr/local/nagios/etc/

chown nagios.nagios send_nsca.cfg 

cp src/send_nsca /usr/local/nagios/bin/

cd /usr/local/nagios/bin/

chown nagios.nagios send_nsca 

vi /usr/local/nagios/libexec/submit_check_result                          #创建脚本

#!/bin/sh

        # Arguments:

        #  $1 = host_name (Short name of host that the service is

        #       associated with)

        #  $2 = svc_description (Description of the service)

        #  $3 = state_string (A string representing the status of

        #       the given service - "OK", "WARNING", "CRITICAL"

        #       or "UNKNOWN")

        #  $4 = plugin_output (A text string that should be used

        #       as the plugin output for the service checks)

        #

        # Convert the state string to the corresponding return code

        return_code=-1

        case "$3" in

                    OK)

                    return_code=0

                        ;;

                WARNING)

                    return_code=1

                        ;;

                CRITICAL)

                    return_code=2

                        ;;

                UNKNOWN)

                    return_code=-1

                        ;;

        esac

        # pipe the service check info into the send_nsca program, which

        # in turn transmits the data to the nsca daemon on the central

        # monitoring server

        /bin/printf "%s\t%s\t%s\t%s\n" "$1" "$2" "$return_code" "$4" | /usr/local/nagios/bin/send_nsca 192.168.20.195 -c  /usr/local/nagios/etc/send_nsca.cfg                             #此处的IP地址为监控中心服务器。

chmod +x /usr/local/nagios/libexec/submit_check_result 

chown nagios.nagios /usr/local/nagios/libexec/submit_check_result 

vi /usr/local/nagios/etc/objects/commands.cfg       #增加如下检测命令

define command{

        command_name    submit_check_result

        command_line    /usr/local/nagios/libexec/submit_check_result $HOSTNAME$ '$SERVICEDESC$' $SERVICESTATE$ '$SERVICEOUTPUT$'

        }

vi /usr/local/nagios/etc/nagios.cfg

enable_notifications=0                          #禁用告警

obsess_over_services=1 #开启被动监控

ocsp_command=submit_check_result #定义每次执行完检查后执行的命令

obsess_over_hosts=1 #开启主机被动监控

ochp_command=submit_check_result #指定每次执行完主机检查后执行的命令

vi /usr/local/nagios/etc/send_nsca.cfg

password=urgamer #设置密码,此处设置的密码要和监控中心服务器一致

配置被监控的服务器,此处配置在分布式监控服务器上配置

cd /usr/local/nagios/etc/objects/

vi hosts.cfg

define host{

        use                     linux-server            ; Name of host template to use

                                                        ; This host definition will inherit all variables that are defined

                                                        ; in (or inherited by) the linux-server host template definition.

        host_name               urg-test01

        alias                   linux-test01

        address                 192.168.20..100

        }

vi services.cfg

        define service{

        use                             local-service         ; Name of service template to use

        host_name                       urg-test01

        service_description             PING

        check_command                   check_ping!100.0,20%!500.0,60%

        }

define service{

        use                             local-service         ; Name of service template to use

        host_name                       urg-test01

        service_description             Root Partition

        check_command                   check_nrpe!check_local_disk!20%!10%!/

        }

define service{

        use                             local-service         ; Name of service template to use

        host_name                       urg-test01

        service_description             Current Users

        check_command                   check_nrpe!check_local_users!20!50

        }

define service{

        use                             local-service         ; Name of service template to use

        host_name                       urg-test01

        service_description             Total Processes

        check_command                   check_nrpe!check_local_procs!250!400!RSZDT

        }

 vi nagios.cfg                                     #添加以下两行配置

cfg_file=/usr/local/nagios/etc/objects/hosts.cfg

cfg_file=/usr/local/nagios/etc/objects/services.cfg

 /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #检查配置文件

  service nagios start                                            #启动nagios

3,安装监控中心服务器

首先确认监控中心服务器已经安装了apache且禁用了SElinux

useradd nagios

passwd nagios

groupadd nagcmd

usermod -G nagcmd nagios

usermod -G nagcmd apache           #创建Nagios用户 创建组 把用户加入组 并加入apche

tar -zxvf nagios-3.2.3.tar.gz

cd nagios-3.2.3

./configure --with-command-group=nagcmd

make all

make install

make install-init

make install-config

make install-commandmode 

make install-webconf  

htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin

tar xzf nagios-plugins-1.4.11.tar.gz

cd nagios-plugins-1.4.11

./configure --with-nagios-user=nagios --with-nagios-group=nagcmd

make

make install

chkconfig --add nagios

chkconfig nagios on    

tar -zxvf nsca-2.7.2.tar.gz

 cd nsca-2.7.2

 ./configure

 make all

cp /usr/local/src/nsca-2.7.2/src/nsca /usr/local/nagios/bin/

chown nagios:nagios /usr/local/nagios/bin/nsca

cp /usr/local/src/nsca-2.7.2/sample-config/nsca.cfg /usr/local/nagios/etc

chown nagios:nagios /usr/local/nagios/etc/nsca.cf

vi /usr/local/nagios/etc/nsca.cfg

password=urgamer             #此处和分布式监控服务器密码一致

vi /usr/local/nagios/etc/nagios.cfg

  check_external_commands=1 # 配置nagios检查扩展命令

  accept_passive_service_checks=1 # 配置接受被动服务检测的结果

  accept_passive_host_checks=1 #配置接受被动主机检测的结果

cd /usr/local/nagios/etc/

mkdir monitor

cd monitor

vi monitor.cfg

define host{

        use     linux-server

        host_name       urg-test01

        address         192,168,20.100

        passive_checks_enabled  1

        active_checks_enabled   0

}

define service{

        use     local-service

        host_name       urg-test01

        service_description Root Partiton

        check_command   check_local_disk!30%!10!/

        check_freshness         1

        freshness_threshold     450

        passive_checks_enabled  1

        active_checks_enables   0

}

/usr/local/nagios/bin/nsca -d -c /usr/local/nagios/nsca.cfg

service nagios restart

此时重新打开浏览器就换显示,新加的服务器。

nagios分布式部署详细文档 _nagios 监控 分布式_02