nagios详细配置


一、被监控linux端配置

 

1、相关组件的安装

yum installgcc glibc glibc-common gd gd-devel libxml2 libxml2-devel openssl openssl-devel  mysql-devel

 

2、添加nagios用户

useradd nagios

 

3、安装nrpe

先安装nagios-plugins-1.4.15.tar.gz插件

# tar -zxvfnagios-plugins-1.4.15.tar.gz 
# cd nagios-plugins-1.4.15
# ./configure –with-nagios-user=nagios–with-nagios-group=nagios
# make
# make install
#tar -zxvf nrpe-2.12.tar.gz
#cd nrpe-2.12
#./configure  (会自动加载SSL)
#如果后面make报错,加如下参数
./configure –enable-ssl –with-ssl-lib=/usr/lib/(当然前提要有openssl)
#make all
#make install-plugin
#make install-daemon
#make install-daemon-config

 

4、配置nrpe信息

#vim /usr/local/nagios/etc/nrpe.cfg
#allowed_hosts=127.0.0.1  //允许监控的IP,可以写多个,用逗号隔开
#/usr/local/nagios/bin/nrpe -c/usr/local/nagios/etc/nrpe.cfg –d(启动nrpe,nrpe不会自动启,每次修改完配置之后将nrpe的进程杀掉,然后再执行这条命令)

 

二、服务器端配置(监控端)

 

1、安装NRPE

#tar -zxvf nrpe-2.12.tar.gz
#cd nagios-nrpe_2.8.1
#./configure   #默认自动添加了openssl
#因为传送过程要加密,如果后面make报错,加如下参数
rpm -qa| grep ssl
openssl-devel-0.9.7a-43.17.el4_6.1
rpm -ql openssl-devel-0.9.7a-43.17.el4_6.1 |more
./configure–enable-ssl –with-ssl-lib=/lib/(当然前提要有openssl)
#make all
#make install-plugin

 

 

2、配置nagios用来监控linux主机

2.1:主配置文件修改(改动部分内容)

[root@loc ~]# vim/usr/local/nagios/etc/nagios.cfg
   # Definitions for monitoring the local(Linux) host
cfg_file=/usr/local/nagios/etc/objects/localhost.cfg  定义监控本机服务(如不需要监控本机,将注释这行)
 
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg   添加此行,定义被监控的主机名和地址
 
cfg_file=/usr/local/nagios/etc/objects/hostgroups.cfg  添加此行,定义被监控主机的分组管理
          
cfg_file=/usr/local/nagios/etc/objects/contactgroups.cfg  添加此行,定义联系人组
 
cfg_file=/usr/local/nagios/etc/objects/services.cfg 打开此行,定义需要监控的服务项
 
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg     添加此行,定义联系人
 
check_external_commands=1

定义在web界面下重启nagios服务

command_check_interval=10s          定义命令检查服务的间隔时间

 

2.2:添加nagios用户,使其拥有对nagios的访问权限

[root@loc ~]# vim/usr/local/nagios/etc/cgi.cfg
authorized_for_system_information=nagiosadmin,nagios
authorized_for_configuration_information=nagiosadmin,nagios
authorized_for_system_commands=nagiosadmin,nagios
authorized_for_all_services=nagiosadmin,nagios
authorized_for_all_hosts=nagiosadmin,nagios
authorized_for_all_service_commands=nagiosadmin,nagios
authorized_for_all_host_commands=nagiosadmin,nagios

可以定义多个用户,添加到后面用逗号隔开就好。

 

2.3:commands.cfg定义外部构件nrpe

vi/usr/local/nagios/etc/objects/commands.cfg
#添加
#check nrpe
define command{
       command_name check_nrpe
       command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
       }

2.4:监控用户能否登陆到数据库

vi /usr/local/nagios/etc/objects/commands.cfg
define command{
         command_name   check_mysql
         command_line   $USER1$/check_mysql -H $HOSTADDRESS$ -d "$ARG1$" -P $ARG2$ -u $ARG3$-p $ARG4$
         }

注:-d:database(数据库)-P:Port(端口) -u:user(用户名) -p:password(密码)

 

2.5:创建联系人和联系人组配置文件

[root@loc ~]# vi/usr/local/nagios/etc/object/contacts.cfg
define contact {
    contact_name     admin          #联系人名
    alias    system administrator        #别名
    service_notification_period    24x7     #服务通知的时间段
    host_notification_period       24x7   #主机通知的时间段
    service_notification_options    w,u,c,r   #当服务出现w—报警,u—未知,c—严重,r—从异常恢复到正常,在这四种情况下通知联系人
    host_notification_options       d,u,r  #当主机出现d----—当机,u—返回不可达,r—从异常情况恢复正常,在这3种情况下通知联系人
    service_notification_commands  notify-service-by-email   #服务出问题通知采用的命令
    host_notification_commands    notify-host-by-email   #同上
    email    aa@aa.com         #指定联系的人email地址
    pager                                         13800138000      #定义通过手机短信的方式发送警报的手机号码
       pager                                         13810255206
}
 
[root@loc ~]# vi/usr/local/nagios/etc/object/contactgroups.cfg
definecontactgroup{
       contactgroup_name    sagroup     #定义组名
       alias     system administrator group   #组别名
       members     admin  #定义联系人名(contacts.cfg中的联系人名)
       }

 

2.6: 创建被监控的主机和主机组文件

[root@locobjects]# vi hosts.cfg
       define host {
             host_name     test232   #被监控的主机名
       alias      tomas     #别名
      address     218.241.223.232    #被监控主机地址
      contact_groups   sagroup     #联系人组
       check_command check-host-alive #检查主机  状态的名字
       check_period 24x7        #提醒周期
       max_check_attempts   5  #检查失败后重试的次数
      notification_interval    5  #提醒的间隔时间
      notification_options    d,u,r  #在什么情况提醒
       }
 
       define host {
      host_name   test233 
       alias        tomas1    
       address  218.241.223.233       
      contact_groups   sagroup      
      check_command    check-host-alive    
       check_period 24x7                  
       max_check_attempts    5        
      notification_interval     5    
      notification_options      d,u,r       
      }
 
       define host {
       host_name     test234                   
       alias         tomas2       
      address         218.241.223.234         
      contact_groups    sagroup    
      check_command    check-host-alive    
       check_period 24x7      
      max_check_attempts   5            
      notification_interval   5            
      notification_options  d,u,r                              }
      
       [root@loc objects]#vi hostgroups.cfg
       define hostgroup{
       hostgroup_name  sa-servers
        alias    saservers
       members   test232,test233,test234
        }

2.7:配置监控主机服务项

vi /usr/local/nagios/etc/object/services.cfg                          #在这下面依次再定义主机232和234
     define service{
       host_name      test233      #必须是hosts.cfg中定义的主机
       service_description   check-host-alive  
       check_command     check-host-alive    #在commands.cfg文件中定义或在nrpe.cfg里面定义的命令
       max_check_attempts   5    #最大重试次数
       normal_check_interval     5   #检查间隔的单位是分钟
       retry_check_interval    2   #检查间隔的单位是分钟
       check_period      24x7                          notification_interval    10   #探测到故障后,每隔多长时间发送一次报警信息,单位是分钟
       notification_period    24x7    #通知选项跟联系人配置文件相同
       notification_options   w,u,c,r                        contact_groups     sagroup    #配置文件contactgroup.cfg定义的组名称
        }
 
      define service{
       host_name               test233
       service_description          check_tcp 80
       check_command          check_tcp!80
       check_period             24x7
       max_check_attempts        4
       normal_check_interval        3
       retry_check_interval          2
       contact_groups            sagroup
       notification_interval           10
       notification_period           24x7
       notification_options          w,u,c,r
        }
 
      define service{
       host_name              test233
       service_description           cpuload
check_nrpe!check_load
       check_period             24x7
       max_check_attempts        4
       normal_check_interval        3
       retry_check_interval          2
       contact_groups            sagroup
       notification_interval           10
       notification_period           24x7
       notification_options          w,u,c,r
        }
 
      define service{
       host_name              test233
       service_description          total-procs
       check_command         check_nrpe!check_total_procs
       check_period             24x7
       max_check_attempts        4
       normal_check_interval        3
       retry_check_interval          2
       contact_groups            sagroup
       notification_interval           10
       notification_period           24x7
       notification_options          w,u,c,r
        }
 
     define service{
        host_name             test233
        service_description        check_itravel_mysql
        check_command           check_mysql!itravel!3306!root!root
        check_period             24x7
        max_check_attempts       4
        normal_check_interval       3
        retry_check_interval         2
        contact_groups            sagroup
        notification_interval          10
        notification_period          24x7
        notification_options          w,u,c,r                  }

三、启动服务

监控主机文件配置完成了,我们可以重启下服务,查看下nagios的界面。

验证下配置文件:

[root@loc objects]# /usr/local/nagios/bin/nagios -v/usr/local/nagios/etc/nagios.cfg
 
Total Warnings:0           #表示文件没有任何警告
Total Errors:  0                 #表示文件没有任何错误

重新启动服务:

[root@loc objects]# /etc/init.d/nagios restart
Runningconfiguration check...done.
Stopping nagios:done.
Starting nagios:done.
 
[root@locobjects]# /etc/init.d/httpd restart

登录界面:

Http://IP/nagios

 

可以清晰的看到前面所做的监控配置了。