This shows you the differences between two versions of the page.
|
monitoring:nagios_aixscripts [2021/01/01 21:25] 127.0.0.1 external edit |
monitoring:nagios_aixscripts [2021/01/18 13:54] (current) manu [Monitor filesystems] |
||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ===== Scripts for AIX monitoring with Nagios ===== | ===== Scripts for AIX monitoring with Nagios ===== | ||
| - | ==== Monitor filesystems ==== | ||
| - | |||
| - | This script with an exception list will check the percent usage of each filesystem, and you can also use wildcards. | ||
| - | <cli prompt='>'> | ||
| - | root@aixserver - /opt/freeware/lib/nagios/plugins > cat MY_checkfs.cfg | ||
| - | #fs_name:pct_warning:pct_critical | ||
| - | # You can add exception in this file, else the default is used | ||
| - | #Example: | ||
| - | #/db*:98:100 , 98 and 100 whill be used for all filesystems /db* | ||
| - | #default:90:95 | ||
| - | /tmp:80:95 | ||
| - | /usr:96:98 | ||
| - | /opt:95:98 | ||
| - | /cdrom:100:100 | ||
| - | /mnt:100:100 | ||
| - | </cli> | ||
| - | |||
| - | <cli prompt='>'> | ||
| - | root@aixserver - /opt/freeware/lib/nagios/plugins > cat MY_checkfs.sh | ||
| - | </cli> | ||
| - | <code> | ||
| - | #!/usr/bin/ksh93 | ||
| - | #set -x | ||
| - | ################################################## | ||
| - | #@(#) send an alert if filesystems usage is higher than threshold | ||
| - | ################################################## | ||
| - | # associate file is checkfs.cfg (exceptions) | ||
| - | # please do not modify this script use checkfs.cfg | ||
| - | # checkfs.cfg format: | ||
| - | # fs_name:pctwarning_threshold:pctcritical_threshold | ||
| - | # version: 1.1 10-02-2014 Manu | ||
| - | ################################################## | ||
| - | |||
| - | dir=`dirname $0` | ||
| - | #. $dir/.env | ||
| - | sn=`basename $0 | cut -d. -f1` | ||
| - | |||
| - | configfile=/opt/freeware/lib/nagios/plugins/$sn.cfg | ||
| - | logpath=/tmp | ||
| - | logname=$logpath/$sn.log | ||
| - | tmpcfg=$logpath/$sn.txt | ||
| - | tmpscript=$logpath/$sn.scr | ||
| - | nb_file=25 | ||
| - | nb_dir=15 | ||
| - | DATE=$(date +"%Y-%m-%d %H:%M") | ||
| - | |||
| - | STATE_OK=0 | ||
| - | STATE_WARNING=1 | ||
| - | STATE_CRITICAL=2 | ||
| - | STATE_UNKNOWN=3 | ||
| - | STATE_DEPENDENT=4 | ||
| - | |||
| - | #echo "$sn: $DATE" > $logname | ||
| - | |||
| - | #------------------------------------------------ | ||
| - | # generate_case_file () | ||
| - | #------------------------------------------------ | ||
| - | generate_case_file() | ||
| - | { | ||
| - | pctwarn_def=$(grep "^#default" $configfile | cut -d":" -f2) | ||
| - | pctcrit_def=$(grep "^#default" $configfile | cut -d":" -f3) | ||
| - | cat $configfile | grep -v "^ *#" | sed '/^$/d' > $tmpcfg | ||
| - | cat $tmpcfg | awk -F':' '{print $1") pctwarn="$2" pctcrit="$3}' | sed '/^$/d;$G' | sed '$s/^/\*\)\ pctwarn=pctwarn_def\ pctcrit=pctcrit_def /' | sed "s/pctwarn_def/$pctwarn_def/" | sed "s/pctcrit_def/$pctcrit_def/" | sed '2,$s/^/\\n/' | sed 's/$/;;/' > $tmpcfg | ||
| - | } | ||
| - | |||
| - | #------------------------------------------------ | ||
| - | # check_fs () | ||
| - | #------------------------------------------------ | ||
| - | check_fs() | ||
| - | { | ||
| - | EXIT_CODE=0 | ||
| - | MSG_WARN="" | ||
| - | MSG_CRIT="" | ||
| - | OUTPUT="All filesystems OK" | ||
| - | clauses=$(cat $tmpcfg) | ||
| - | for i in $(df -k | egrep -v "Filesystem|:|/proc" | grep -v ":" | grep "/dev/" | awk '{print $4 $7}') | ||
| - | do | ||
| - | fs=`echo $i | cut -f 2 -d%` | ||
| - | pct_fs=`echo $i | cut -f 1 -d%` | ||
| - | mail="" | ||
| - | cmd="case $fs in \n $clauses \n esac" | ||
| - | echo $cmd > $tmpscript | ||
| - | . $tmpscript > /dev/null | ||
| - | if [ $pct_fs -gt $pctwarn ] | ||
| - | then | ||
| - | if [ $pct_fs -gt $pctcrit ] | ||
| - | then | ||
| - | MSG_CRIT="$MSG_CRIT $fs used $pct_fs" | ||
| - | EXIT_CODE=$(echo $STATE_CRITICAL) | ||
| - | else | ||
| - | MSG_WARN="$MSG_WARN $fs used $pct_fs" | ||
| - | if [ $STATE_WARNING -gt $EXIT_CODE ] | ||
| - | then | ||
| - | EXIT_CODE=$(echo $STATE_WARNING) | ||
| - | fi | ||
| - | fi | ||
| - | #info_fs >> $logname 2>&1 & | ||
| - | fi | ||
| - | done | ||
| - | |||
| - | if [ "$MSG_CRIT" != "" ] | ||
| - | then | ||
| - | OUTPUT="DISK CRITICAL - $MSG_CRIT" | ||
| - | fi | ||
| - | if [ "$MSG_WARN" != "" ] | ||
| - | then | ||
| - | if [ "$MSG_CRIT" != "" ] | ||
| - | then | ||
| - | OUTPUT="$OUTPUT -- DISK WARNING - $MSG_WARN" | ||
| - | else | ||
| - | OUTPUT="DISK WARNING - $MSG_WARN" | ||
| - | fi | ||
| - | fi | ||
| - | rm $tmpcfg $tmpscript > /dev/null 2>&1 | ||
| - | } | ||
| - | |||
| - | #------------------------------------------------ | ||
| - | # info_fs () | ||
| - | #------------------------------------------------ | ||
| - | info_fs() | ||
| - | { | ||
| - | echo "####### Here are the $nb_file biggest files from filesystem $fs" | ||
| - | find $fs -xdev -type f -ls | sort +6nr | head -$nb_file | ||
| - | echo "\n####### Here are the $nb_dir biggest directories from filesystem $fs" | ||
| - | du -ms $fs/* | sort -nr | head -$nb_dir | ||
| - | } | ||
| - | |||
| - | generate_case_file #>> $logname 2>&1 | ||
| - | check_fs #>> $logname 2>&1 | ||
| - | echo $OUTPUT | ||
| - | exit $EXIT_CODE | ||
| - | </code> | ||