User Tools

Site Tools


aix:scripts_category:nagios_aix_check

This is an old revision of the document!


check_mailq_aix

#!/usr/bin/ksh
# set -x
##################################################
#@(#) check mail server and mailq
# best practice is to stop sendmail service on AIX
# as it's not required to send email
# but if you leave it running, it happens that
# the service hangs, then no mails 'll be sent
##################################################
# v1.1 12-2020
##################################################

# Return codes:
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3

DS='smtp.local.lu'             # Add here the SMTP server to check

MAILDIR=/var/spool/mail
TMPfile=/tmp/check_mail.tmp

# no warning before 3 days
mailwarning=3   # max days mail pending
mailcritical=10 # max days mail pending

if [ $(grep "^DS${DS}" /etc/sendmail.cf > /dev/nul 2>&1; echo $?) -ne "0" ]
then
  echo "CRITICAL: config file error /etc/sendmail.cf"
  exit $STATE_CRITICAL
else
  if [ $(ls $MAILDIR | grep -v lost | wc -l) != "0" ]
  then
    cat /dev/null > $TMPfile
    for file1 in $(find $MAILDIR -type f)
    do
      grep '^Date:' $file1| cut -d' ' -f2-5 | head -5 | tr ' ' ';' >> $TMPfile
    done
    for line in $(cat $TMPfile)
    do
      val=$(echo $line | sed 's/;/\ /g')
      /opt/freeware/bin/date -d"$val" "+%s" >> $TMPfile.1
    done

    val=$(cat $TMPfile.1 | sort | head -1)
    val1=$(/opt/freeware/bin/date "+%s")
    days=$(echo "scale=0;($val1-$val)/3600/24" | bc)
    rm $TMPfile* 2>/dev/null
    if [ "$days" -gt "$mailcritical" ]
    then
      echo "CRITICAL: mail pending $days days"
      exit $STATE_CRITICAL
    else
      if [ "$days" -gt "$mailwarning" ]
      then
        echo "WARNING: mail pending $days days"
        exit $STATE_WARNING
      fi
    fi
  else
    echo "OK"
    exit $STATE_OK
  fi
fi

check_aix_cpu.sh

#!/usr/bin/ksh

# check VCPU usage in percent
# according to a definition set by the app . owner
# the virtual cpu  usage > 99% CPUs left on the system will result
# in an errorcode 2
# for nagios to be critical and alerted

SAR="/usr/sbin/sar"

STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4

LIST_WARNING_THRESHOLD=${LIST_WARNING_THRESHOLD:-"95"}
LIST_CRITICAL_THRESHOLD=${LIST_CRITICAL_THRESHOLD:-"99"}
INTERVAL_SEC=${INTERVAL_SEC:="1"}
NUM_REPORT=${NUM_REPORT:="3"}

print_usage() {
        echo ""
        echo "$PROGNAME $RELEASE - CPU Utilization check script for Nagios"
        echo ""
        echo "Usage: check_cpu_stats.sh -w -c (-i -n)"
        echo ""
        echo "  -w  Warning threshold in % for warn_user,warn_system,warn_iowait CPU (default : ${LIST_WARNING_THRESHOLD})"
        echo "  Exit with WARNING status if cpu exceeds warn_n"
        echo "  -c  Critical threshold in % for crit_user,crit_system,crit_iowait CPU (default : ${LIST_CRITICAL_THRESHOLD})"
        echo "  Exit with CRITICAL status if cpu exceeds crit_n"
        echo "  -h  Show this page"
        echo ""
    echo "Usage: $PROGNAME -w 95 -c 99"
    echo ""
    exit 0
}

# Parse parameters
while [ $# -gt 0 ]; do
    case "$1" in
        -h | --help)
            print_usage
            exit $STATE_OK
            ;;
        -w | --warning)
                shift
                LIST_WARNING_THRESHOLD=$1
                ;;
        -c | --critical)
               shift
                LIST_CRITICAL_THRESHOLD=$1
                ;;
        esac
shift
done

NUM_VCPUS=$(lsdev -Cc processor|wc -l|awk '{print $1}')
PHYSC=$($SAR ${INTERVAL_SEC} ${NUM_REPORT} | tail -1 | awk '{print $6}')

PCTUSED=$(echo "scale=2\n ${PHYSC}*100/${NUM_VCPUS}" | bc)

if [ $PCTUSED -gt $LIST_WARNING_THRESHOLD ]
then
  if [ $PCTUSED -gt $LIST_CRITICAL_THRESHOLD ]
  then
    echo "CRITICAL - CPU usage at $PCTUSED%"
    exit $STATE_CRITICAL
  else
    echo "WARNING - CPU usage at $PCTUSED%"
    exit $STATE_WARNING
  fi
else
  echo "OK - CPU usage at $PCTUSED%"
  exit $STATE_OK
fi
aix/scripts_category/nagios_aix_check.1610974304.txt.gz · Last modified: 2021/01/18 13:51 by manu