http://hi.baidu.com/icanwen/item/e6ed673c3f79e7302f20c403

最近公司的机房空调经常的自动关闭!使机房温度过热引起设备故障!因而想到用nagios检测CPU温度!当温度达到一定上限时!自动发送SMS到手机。在dell R200测试通过。参照了网上 很多高手的资料在此谢过!

nagios 主程序服务服务定制设置如下:

define service{

       use                             generic-service         ; Name of service template to use

       host_name                       fileserver

       service_description             cpu temperature

       check_command                   check_nrpe!check_cpu_temp

       }

被监控主机服务定制如下:

vi /usr/local/nagios/etc/nrpe.cfg

#解释:用sensors检测cpu的温度当cpu温度到达35度时报警,到达40度时处于紧急状态

command[check_cpu_temp]=/usr/local/nagios/libexec/check_cputemp.sh -m sensors -w 35 -c 40

不可以上传附件 插件代码直接贴到这里了:)

#!/bin/sh

######################check_cputemp#######################

#version: 1.0

#Author : xiaoyong wen

#date : May 2011

#Licence GPLv2

#INSTALLATION

#the script need to install lm_sensors

#sensors's output need like below format

#########################################

#coretemp-isa-0000   #

#Adapter: ISA adapter   #

#Core 0:      +27°C  (high =   +85°C) #                  

    #

#coretemp-isa-0001   #

#Adapter: ISA adapter   #

#Core 1:      +25°C  (high =   +85°C) #

#########################################

#you can use NRPE to define service in nagios

#check_nrpe!check_cputemp.sh

######my taobao shop http://ujjj.taobao.com######just a AD :)####

# Plugin return statements

STATE_OK=0

STATE_WARNING=1

STATE_CRITICAL=2

STATE_UNKNOWN=3

print_help_msg(){

$Echo "Usage: $0 -h to get help."

}

print_full_help_msg(){

$Echo "Usage:"

$Echo "$0 [ -v ] -m sensors -w cpuT -c cpuT"

$Echo "Sepicify the method to use the temperature data sensors."

$Echo "And the corresponding Critical value must greater than Warning value."

$Echo "Example:"

$Echo "${0} -m sensors -w 50 -c 55"

}

print_err_msg(){

$Echo "Error."

print_full_help_msg

}


to_debug(){

if [ "$Debug" = "true" ]; then

$Echo "$*" >> /var/log/check_sys_temperature.log.$$ 2>&1

fi

}

unset LANG

Echo="echo -e"


if [ $# -lt 1 ]; then

print_help_msg

exit 3

else

while getopts :vhm:w:c: OPTION

do

 case $OPTION

  in

  v)

  #$Echo "Verbose mode."

  Debug=true

  ;;

  m)

  method=$OPTARG

  ;;

  w)

  WARNING=$OPTARG

  ;;

  c)

  CRITICAL=$OPTARG ;;

  h)

  print_full_help_msg

  exit 3

  ;;

  ?)

  $Echo "Error: Illegal Option."

  print_help_msg

  exit 3

  ;;

 esac

done

if [ "$method" = "sensors" ]; then

 use_sensors="true"

 to_debug use_sensors

else

 $Echo "Error. Must to sepcify the method to use sensors."

 print_full_help_msg

 exit 3

fi

to_debug All Values  are \" Warning: "$WARNING" and Critical: "$CRITICAL" \".



fi

#########lm_sensors##################

if [ "$use_sensors" = "true" ]; then

sensorsCheckOut=`which sensors 2>&1`

if [ $? -ne 0 ];then

 echo $sensorsCheckOut

 echo Maybe you need to check your sensors.

 exit 3

fi

to_debug Use $sensorsCheckOut to check system temperature

TEMP1=`sensors | head -3 | tail -1 | gawk '{print $3}' | grep -o [0-9][0-9]`

TEMP2=`sensors | head -7 | tail -1 | gawk '{print $3}' | grep -o [0-9][0-9]`

SUM=$(( $TEMP1 + $TEMP2 ))

TEMP=$(($SUM/2))

if [ -z "$TEMP" ] ; then

 $Echo "No Data been get here. Please confirm your ARGS and re-check it with Verbose mode, then to check the log."

 exit 3

fi

to_debug temperature data is $TEMP

else

$Echo "Error. Must to sepcify the method to use sensors"

print_full_help_msg

exit 3

fi

######### Comparaison with the warnings and criticals thresholds given by user############

CPU_TEMP=$TEMP

#if [ "$WARNING" != "0" ] || [ "$CRITICAL" != "0" ]; then


if [ "$CPU_TEMP" -gt "$CRITICAL" ]  && [ "$CRITICAL" != "0" ]; then

       STATE="$STATE_CRITICAL"

       STATE_MESSAGE="CRITICAL"

to_debug $STATE , Message is $STATE_MESSAGE

       elif [ "$CPU_TEMP" -gt "$WARNING" ] && [ "$WARNING" != "0" ]; then

       STATE="$STATE_WARNING"

       STATE_MESSAGE="WARNING"

to_debug $STATE , Message is $STATE_MESSAGE

       else

       STATE="$STATE_OK"

STATE_MESSAGE="OK"

to_debug $STATE , Message is $STATE_MESSAGE

       fi


echo "The TEMPERATURE "$STATE_MESSAGE" "-" The CPU's Temperature is "$CPU_TEMP" degree"


exit $STATE