nagios详细配置
一、被监控linux端配置
1、相关组件的安装
yum installgcc glibc glibc-common gd gd-devel libxml2 libxml2-devel openssl openssl-devel mysql-devel
2、添加nagios用户
useradd nagios
3、安装nrpe
先安装nagios-plugins-1.4.15.tar.gz插件
# tar -zxvfnagios-plugins-1.4.15.tar.gz
# cd nagios-plugins-1.4.15
# ./configure –with-nagios-user=nagios–with-nagios-group=nagios
# make
# make install
#tar -zxvf nrpe-2.12.tar.gz
#cd nrpe-2.12
#./configure (会自动加载SSL)
#如果后面make报错,加如下参数
./configure –enable-ssl –with-ssl-lib=/usr/lib/(当然前提要有openssl)
#make all
#make install-plugin
#make install-daemon
#make install-daemon-config
4、配置nrpe信息
#vim /usr/local/nagios/etc/nrpe.cfg
#allowed_hosts=127.0.0.1 //允许监控的IP,可以写多个,用逗号隔开
#/usr/local/nagios/bin/nrpe -c/usr/local/nagios/etc/nrpe.cfg –d(启动nrpe,nrpe不会自动启,每次修改完配置之后将nrpe的进程杀掉,然后再执行这条命令)
二、服务器端配置(监控端)
1、安装NRPE
#tar -zxvf nrpe-2.12.tar.gz
#cd nagios-nrpe_2.8.1
#./configure #默认自动添加了openssl
#因为传送过程要加密,如果后面make报错,加如下参数
rpm -qa| grep ssl
openssl-devel-0.9.7a-43.17.el4_6.1
rpm -ql openssl-devel-0.9.7a-43.17.el4_6.1 |more
./configure–enable-ssl –with-ssl-lib=/lib/(当然前提要有openssl)
#make all
#make install-plugin
2、配置nagios用来监控linux主机
2.1:主配置文件修改(改动部分内容)
[root@loc ~]# vim/usr/local/nagios/etc/nagios.cfg
# Definitions for monitoring the local(Linux) host
cfg_file=/usr/local/nagios/etc/objects/localhost.cfg 定义监控本机服务(如不需要监控本机,将注释这行)
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg 添加此行,定义被监控的主机名和地址
cfg_file=/usr/local/nagios/etc/objects/hostgroups.cfg 添加此行,定义被监控主机的分组管理
cfg_file=/usr/local/nagios/etc/objects/contactgroups.cfg 添加此行,定义联系人组
cfg_file=/usr/local/nagios/etc/objects/services.cfg 打开此行,定义需要监控的服务项
cfg_file=/usr/local/nagios/etc/objects/contacts.cfg 添加此行,定义联系人
check_external_commands=1
定义在web界面下重启nagios服务
command_check_interval=10s 定义命令检查服务的间隔时间
2.2:添加nagios用户,使其拥有对nagios的访问权限
[root@loc ~]# vim/usr/local/nagios/etc/cgi.cfg
authorized_for_system_information=nagiosadmin,nagios
authorized_for_configuration_information=nagiosadmin,nagios
authorized_for_system_commands=nagiosadmin,nagios
authorized_for_all_services=nagiosadmin,nagios
authorized_for_all_hosts=nagiosadmin,nagios
authorized_for_all_service_commands=nagiosadmin,nagios
authorized_for_all_host_commands=nagiosadmin,nagios
可以定义多个用户,添加到后面用逗号隔开就好。
2.3:commands.cfg定义外部构件nrpe
vi/usr/local/nagios/etc/objects/commands.cfg
#添加
#check nrpe
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
2.4:监控用户能否登陆到数据库
vi /usr/local/nagios/etc/objects/commands.cfg
define command{
command_name check_mysql
command_line $USER1$/check_mysql -H $HOSTADDRESS$ -d "$ARG1$" -P $ARG2$ -u $ARG3$-p $ARG4$
}
注:-d:database(数据库)-P:Port(端口) -u:user(用户名) -p:password(密码)
2.5:创建联系人和联系人组配置文件
[root@loc ~]# vi/usr/local/nagios/etc/object/contacts.cfg
define contact {
contact_name admin #联系人名
alias system administrator #别名
service_notification_period 24x7 #服务通知的时间段
host_notification_period 24x7 #主机通知的时间段
service_notification_options w,u,c,r #当服务出现w—报警,u—未知,c—严重,r—从异常恢复到正常,在这四种情况下通知联系人
host_notification_options d,u,r #当主机出现d----—当机,u—返回不可达,r—从异常情况恢复正常,在这3种情况下通知联系人
service_notification_commands notify-service-by-email #服务出问题通知采用的命令
host_notification_commands notify-host-by-email #同上
email aa@aa.com #指定联系的人email地址
pager 13800138000 #定义通过手机短信的方式发送警报的手机号码
pager 13810255206
}
[root@loc ~]# vi/usr/local/nagios/etc/object/contactgroups.cfg
definecontactgroup{
contactgroup_name sagroup #定义组名
alias system administrator group #组别名
members admin #定义联系人名(contacts.cfg中的联系人名)
}
2.6: 创建被监控的主机和主机组文件
[root@locobjects]# vi hosts.cfg
define host {
host_name test232 #被监控的主机名
alias tomas #别名
address 218.241.223.232 #被监控主机地址
contact_groups sagroup #联系人组
check_command check-host-alive #检查主机 状态的名字
check_period 24x7 #提醒周期
max_check_attempts 5 #检查失败后重试的次数
notification_interval 5 #提醒的间隔时间
notification_options d,u,r #在什么情况提醒
}
define host {
host_name test233
alias tomas1
address 218.241.223.233
contact_groups sagroup
check_command check-host-alive
check_period 24x7
max_check_attempts 5
notification_interval 5
notification_options d,u,r
}
define host {
host_name test234
alias tomas2
address 218.241.223.234
contact_groups sagroup
check_command check-host-alive
check_period 24x7
max_check_attempts 5
notification_interval 5
notification_options d,u,r }
[root@loc objects]#vi hostgroups.cfg
define hostgroup{
hostgroup_name sa-servers
alias saservers
members test232,test233,test234
}
2.7:配置监控主机服务项
vi /usr/local/nagios/etc/object/services.cfg #在这下面依次再定义主机232和234
define service{
host_name test233 #必须是hosts.cfg中定义的主机
service_description check-host-alive
check_command check-host-alive #在commands.cfg文件中定义或在nrpe.cfg里面定义的命令
max_check_attempts 5 #最大重试次数
normal_check_interval 5 #检查间隔的单位是分钟
retry_check_interval 2 #检查间隔的单位是分钟
check_period 24x7 notification_interval 10 #探测到故障后,每隔多长时间发送一次报警信息,单位是分钟
notification_period 24x7 #通知选项跟联系人配置文件相同
notification_options w,u,c,r contact_groups sagroup #配置文件contactgroup.cfg定义的组名称
}
define service{
host_name test233
service_description check_tcp 80
check_command check_tcp!80
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups sagroup
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
}
define service{
host_name test233
service_description cpuload
check_nrpe!check_load
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups sagroup
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
}
define service{
host_name test233
service_description total-procs
check_command check_nrpe!check_total_procs
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups sagroup
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
}
define service{
host_name test233
service_description check_itravel_mysql
check_command check_mysql!itravel!3306!root!root
check_period 24x7
max_check_attempts 4
normal_check_interval 3
retry_check_interval 2
contact_groups sagroup
notification_interval 10
notification_period 24x7
notification_options w,u,c,r }
三、启动服务
监控主机文件配置完成了,我们可以重启下服务,查看下nagios的界面。
验证下配置文件:
[root@loc objects]# /usr/local/nagios/bin/nagios -v/usr/local/nagios/etc/nagios.cfg
Total Warnings:0 #表示文件没有任何警告
Total Errors: 0 #表示文件没有任何错误
重新启动服务:
[root@loc objects]# /etc/init.d/nagios restart
Runningconfiguration check...done.
Stopping nagios:done.
Starting nagios:done.
[root@locobjects]# /etc/init.d/httpd restart
登录界面:
Http://IP/nagios
可以清晰的看到前面所做的监控配置了。