实验目的:部署Nagios监控软件,实时监控主机和服务(有问题通过Email告知Admin)
实验环境:rhel5.5   10.1.1.190(web)  10.1.1.191(mysql) 10.1.1.192(web)  nagios:10.1.1.193
实验前提:Nagios监控服务器有web服务(此处采用apache)
实验步骤:

第一:部署Nagios所需软件包说明
fping-2.4-1.b2.2.el5.rf.i386.rpm
nagios-2.9-1.el5.rf.i386.rpm                 --nagios主程序   
nagios-devel-2.9-1.el5.rf.i386.rpm         --nagios所需库文件
nagios-nrpe-2.5.2-1.el5.rf.i386.rpm         --nrpe服务
nagios-plugins-1.4.9-1.el5.rf.i386.rpm         --nagios服务插件   
nagios-plugins-nrpe-2.12-3.el5.i386.rpm    --nrpe服务插件   
perl-Crypt-DES-2.05-3.2.el5.rf.i386.rpm
perl-Net-SNMP-5.2.0-1.2.el5.rf.i386.rpm

        |-监控服务,监控主机          ------>nagios服务
【nagios】
        |-资源阀值,eg:磁盘容量超过70%报警等  ------->nrpe服务提供


第二:nagios主配置文件配置
配置环境:/etc/nagios/
        localhost.cfg                模板参考
        cgi.cfg                          权限定义
        command-plugins.cfg  插件定义
        commands.cfg             插件定义
        nagios.cfg                    nagios服务配置
        nrpe.cfg                        nrpe服务
        resource.cfg                 插件目录位置定义


/etc/nagios/nagios.cfg
cfg_file=/etc/nagios/contactgroups.cfg
cfg_file=/etc/nagios/contacts.cfg
#cfg_file=/etc/nagios/dependencies.cfg
#cfg_file=/etc/nagios/escalations.cfg
cfg_file=/etc/nagios/hostgroups.cfg
cfg_file=/etc/nagios/hosts.cfg
cfg_file=/etc/nagios/services.cfg
cfg_file=/etc/nagios/timeperiods.cfg

check_external_commands=1

第三:nagios.cfg文件中,相关文件的配置
1./etc/nagios/timeperiods.cfg 定义上班时间(三班倒)
define timeperiod{
                timeperiod_name morning
                alias                                morning
                sunday                            8:00-16:00
                monday                          8:00-16:00
                tuesday                          8:00-16:00
                wednesday                     8:00-16:00
                thursday                         8:00-16:00
                friday                             8:00-16:00
                saturday                        8:00-16:00

}
define timeperiod{
                timeperiod_name afternoon
                alias                                aftertime
                sunday                        16:00-24:00
                monday                       16:00-24:00
                tuesday                       16:00-24:00
                wednesday                 16:00-24:00
                thursday                      16:00-24:00
                friday                            16:00-24:00
                saturday                      16:00-24:00
}

define timeperiod{
                timeperiod_name evening
                alias                                 evening
                sunday                        24:00-8:00
                monday                      24:00-8:00
                tuesday                      24:00-8:00
                wednesday                 24:00-8:00
                thursday                      24:00-8:00
                friday                           24:00-8:00
                saturday                      24:00-8:00
}
2./etc/nagios/contacts.cfg定义联系人  

define contact{
                contact_name                               john
                alias                                              sql Admin, web admin
                service_notification_period         evening
                host_notification_period              evening
                service_notification_options        w,u,c,r
                host_notification_options             d,r
                service_notification_commands  notify-by-email
                host_notification_commands       host-notify-by-email
                email                                             john@163.com
                }

define contact{
                contact_name                               terry
                alias                                              web admin
                service_notification_period         evening
                host_notification_period               evening
                service_notification_options        w,u,c,r
                host_notification_options                 d,r
                service_notification_commands  notify-by-email
                host_notification_commands       host-notify-by-email
                email                                             terry@163.com
                }

3./etc/nagios/contactgroups.cfg 定义联系组

define contactgroup{
                contactgroup_name                        webadm
                alias                                                 web Administrators
                members                                         terry,john
                }

define contactgroup{
                contactgroup_name                         sqladm
                alias                                                 web Administrators
                members                                          john
                }

4./etc/nagios/hosts.cfg定义被监控主机

define host{
                host_name                             station190.cluster.com              
                alias                                                     station190
                notification_options                            d,u,r
                notification_period                              24x7
                notification_interval                            10
                max_check_attemps                            4
                contact_groups                                   webadm
                address                                              10.1.1.190
                check_command                          check-host-alive    
                                                  \<-在 commands.cfg中一定要定义
                }
define host{
                host_name                                 station191.cluster.com  
                alias                                            station191
                notification_options                    d,u,r
                notification_period                       24x7
                notification_interval                    10
                max_check_attemps                     4
                contact_groups                            sqladm
                address                                     10.1.1.191
                check_command                 check-host-alive    
                                                         \ <-在 commands.cfg中一定要 定义
                }
define host{
                host_name       station192.cluster.com                                 
                alias                                         station192
                notification_options                  d,u,r
                notification_period                    24x7
                notification_interval                 10
                max_check_attemps                 4
                contact_groups                         webadm
                address                                  10.1.1.192
                check_command               check-host-alive    
                                                       \<-在 commands.cfg中一定要 定义
                }


--------------------------------------------------------------------------------------------------------------
define command{
        command_name    check-host-alive
        command_line    $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 1
        }
--------------------------------------------------------------------------------------------------------------

5./etc/nagios/hostgroups.cfg 定义主机组
define hostgroup{
        hostgroup_name      web_service
        alias                   www Servers
        members               station190.cluster.com, station192.cluster.com
        }

define hostgroup{
        hostgroup_name        sql_service
        alias                     sql Servers
        members               station191.cluster.com
        }


6./etc/nagios/services.cfg

#--------------------------------主机----------------------------------------
define  service{
        host_name   station190.cluster.com
        service_description     check-host-alive
        check_command           check-host-alive
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          webadm
}
define  service{
        host_name   station191.cluster.com
        service_description       check-host-alive
        check_command           check-host-alive
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          webadm
}
define  service{
        host_name   station192.cluster.com
        service_description     check-host-alive
        check_command           check-host-alive
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          webadm
}

#------------------------------服务-----------------------------------------
define  service{
        host_name   station190.cluster.com
        service_description     check_http
        check_command           check_http
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          webadm
}

define  service{
        host_name   station191.cluster.com
        service_description     check_mysql
        check_command           check_mysql
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          sqladm
}

define  service{
        host_name   station192.cluster.com
        service_description     check_http
        check_command           check_http
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          webadm
}


注意:mysql服务检测需要:
#------------------------------------------------------------------------------------
host:10.1.1.191 
mysql>grant select on *.* to  'test'@'10.1.1.193' identified by '123123';
mysql>flush privileges;

nagios:10.1.1.193
在/etc/nagios/commands.cfg中添加:
define command{
        command_name    check_mysql
        command_line    $USER1$/check_mysql -H $HOSTADDRESS$ -P 3306 -u test -p123
}

dns服务:
define command{
        command_name    check_dns
        command_line    $USER1$/check_dns -H www.yahoo.com -s $HOSTADDRESS$
        }

nagios相关插件存放位置:/usr/lib/nagios/plugins

7./etc/nagios/cgi.cfg 权限定义
default_user_name=terry
authorized_for_system_information=terry
authorized_for_configuration_information=terry
authorized_for_system_commands=terry
authorized_for_all_services=terry
authorized_for_all_hosts=terry
authorized_for_all_service_commands=terry
authorized_for_all_host_commands=terry

第四:校验 nagios配置是否有错
nagios  -v /etc/nagios/nagios.cfg        <- 校验
Total Warnings: 0
Total Errors:   0
启动nagios:
service nagios start <- 不报错

第五:配置apache服务器(在nagios部署之前完成apache搭建)
yum -y install httpd-*

安装nagios之后会产生:
/etc/httpd/conf.d/nagios.conf

ScriptAlias /nagios/cgi-bin "/usr/lib/nagios/cgi"
<Directory "/usr/lib/nagios/cgi">
   Options ExecCGI
   AllowOverride None
   Order allow,deny
   Allow from 10.1.1.0/255.255.255.0            #####
   AuthName "Nagios Access"
   AuthType Basic
   AuthUserFile /etc/nagios/htpasswd.users        <- 定义用户
   Require valid-user
</Directory>
Alias /nagios "/usr/share/nagios"
<Directory "/usr/share/nagios">
   Options None
   AllowOverride None
   Order allow,deny
   Allow from 10.1.1.0/255.255.255.0            ####
   AuthName "Nagios Access"
   AuthType Basic
   AuthUserFile /etc/nagios/htpasswd.users        <- 定义
   Require valid-user
</Directory>

htpasswd -c /etc/nagios/htpasswd.users terry


=========================================
资源阀值监控:


服务器
hosts.cfg  -> 对应主机

commands.cfg
define command {
    command_name check_nrpe
    command_line  $USER1$/check_nrpe -H $HOSTADDRESS$ -P 5666  -c $ARG1$
}

/etc/nagios/services.cfg
define  service{
        host_name   station146.cluster.com
        service_description     check_disk1
        check_command           check_nrpe!check_disk1        <- command
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          sqladm
}
define  service{
        host_name   station146.cluster.com
        service_description     check_disk1
        check_command           check_nrpe!check_user        <- command
        max_check_attempts      5
        normal_check_interval   3
        retry_check_interval    2
        check_period            24x7
        notification_interval   10
        notification_period     24x7
        contact_groups          sqladm
}


客户端
nagios-plugins-1.4.9-1.el5.rf
nagios-nrpe

1.cat /etc/nagios/nrpe.cfg
pid_file=/var/run/nrpe.pid
server_port=5666
server_address=127.0.0.1, 10.1.1.193
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=127.0.0.1,10.1.1.193
dont_blame_nrpe=0
debug=0
command_timeout=60
command[check_disk1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/sda1 (剩余20%警告)
command[check_user]=/usr/lib/nagios/plugins/check_users -w 5  -c 10

2.vi /etc/xined.d/nrpt
   disable         = no
service xinted restart


-------------------------------Nagios部署完成---------------------------------------
有不足之处请网友批评指正!稍后会每天发布一遍个人实验积累的笔记!