This is an old revision of the document!
check_mailq_aix
#!/usr/bin/ksh
# set -x
##################################################
#@(#) check mail server and mailq
# best practice is to stop sendmail service on AIX
# as it's not required to send email
# but if you leave it running, it happens that
# the service hangs, then no mails 'll be sent
##################################################
# v1.1 12-2020
##################################################
# Return codes:
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
DS='smtp.local.lu' # Add here the SMTP server to check
MAILDIR=/var/spool/mail
TMPfile=/tmp/check_mail.tmp
# no warning before 3 days
mailwarning=3 # max days mail pending
mailcritical=10 # max days mail pending
if [ $(grep "^DS${DS}" /etc/sendmail.cf > /dev/nul 2>&1; echo $?) -ne "0" ]
then
echo "CRITICAL: config file error /etc/sendmail.cf"
exit $STATE_CRITICAL
else
if [ $(ls $MAILDIR | grep -v lost | wc -l) != "0" ]
then
cat /dev/null > $TMPfile
for file1 in $(find $MAILDIR -type f)
do
grep '^Date:' $file1| cut -d' ' -f2-5 | head -5 | tr ' ' ';' >> $TMPfile
done
for line in $(cat $TMPfile)
do
val=$(echo $line | sed 's/;/\ /g')
/opt/freeware/bin/date -d"$val" "+%s" >> $TMPfile.1
done
val=$(cat $TMPfile.1 | sort | head -1)
val1=$(/opt/freeware/bin/date "+%s")
days=$(echo "scale=0;($val1-$val)/3600/24" | bc)
rm $TMPfile* 2>/dev/null
if [ "$days" -gt "$mailcritical" ]
then
echo "CRITICAL: mail pending $days days"
exit $STATE_CRITICAL
else
if [ "$days" -gt "$mailwarning" ]
then
echo "WARNING: mail pending $days days"
exit $STATE_WARNING
fi
fi
else
echo "OK"
exit $STATE_OK
fi
fi
check_aix_cpu.sh
#!/usr/bin/ksh
# check VCPU usage in percent
# according to a definition set by the app . owner
# the virtual cpu usage > 99% CPUs left on the system will result
# in an errorcode 2
# for nagios to be critical and alerted
SAR="/usr/sbin/sar"
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
LIST_WARNING_THRESHOLD=${LIST_WARNING_THRESHOLD:-"95"}
LIST_CRITICAL_THRESHOLD=${LIST_CRITICAL_THRESHOLD:-"99"}
INTERVAL_SEC=${INTERVAL_SEC:="1"}
NUM_REPORT=${NUM_REPORT:="3"}
print_usage() {
echo ""
echo "$PROGNAME $RELEASE - CPU Utilization check script for Nagios"
echo ""
echo "Usage: check_cpu_stats.sh -w -c (-i -n)"
echo ""
echo " -w Warning threshold in % for warn_user,warn_system,warn_iowait CPU (default : ${LIST_WARNING_THRESHOLD})"
echo " Exit with WARNING status if cpu exceeds warn_n"
echo " -c Critical threshold in % for crit_user,crit_system,crit_iowait CPU (default : ${LIST_CRITICAL_THRESHOLD})"
echo " Exit with CRITICAL status if cpu exceeds crit_n"
echo " -h Show this page"
echo ""
echo "Usage: $PROGNAME -w 95 -c 99"
echo ""
exit 0
}
# Parse parameters
while [ $# -gt 0 ]; do
case "$1" in
-h | --help)
print_usage
exit $STATE_OK
;;
-w | --warning)
shift
LIST_WARNING_THRESHOLD=$1
;;
-c | --critical)
shift
LIST_CRITICAL_THRESHOLD=$1
;;
esac
shift
done
NUM_VCPUS=$(lsdev -Cc processor|wc -l|awk '{print $1}')
PHYSC=$($SAR ${INTERVAL_SEC} ${NUM_REPORT} | tail -1 | awk '{print $6}')
PCTUSED=$(echo "scale=2\n ${PHYSC}*100/${NUM_VCPUS}" | bc)
if [ $PCTUSED -gt $LIST_WARNING_THRESHOLD ]
then
if [ $PCTUSED -gt $LIST_CRITICAL_THRESHOLD ]
then
echo "CRITICAL - CPU usage at $PCTUSED%"
exit $STATE_CRITICAL
else
echo "WARNING - CPU usage at $PCTUSED%"
exit $STATE_WARNING
fi
else
echo "OK - CPU usage at $PCTUSED%"
exit $STATE_OK
fi