User Tools

Site Tools


aix:scripts_category:nagios_aix_check

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
aix:scripts_category:nagios_aix_check [2021/01/18 13:52]
manu
aix:scripts_category:nagios_aix_check [2025/02/24 15:34] (current)
manu [Check path disks]
Line 2: Line 2:
  
 ==== Monitor filesystems ==== ==== Monitor filesystems ====
 +
 +=== script 1 ===
  
 This script with an exception list will check the percent usage of each filesystem, and you can also use wildcards. This script with an exception list will check the percent usage of each filesystem, and you can also use wildcards.
Line 130: Line 132:
 generate_case_file #>> $logname 2>&1 generate_case_file #>> $logname 2>&1
 check_fs #>> $logname 2>&1 check_fs #>> $logname 2>&1
 +echo $OUTPUT
 +exit $EXIT_CODE
 +</​code>​
 +
 +=== script 2 ===
 +
 +<cli prompt='>'>​
 +root@aixserver - /​opt/​freeware/​lib/​nagios/​plugins > cat check_fs.cfg
 +#​filesystem_name:​alert_w(%):​alert_c(%)
 +/tmp:80:95
 +/usr:96:98
 +/​cdrom:​100:​100
 +/​mnt:​100:​100
 +/​export/​aix6100-09:​99:​100
 +/​export/​aix7100-03:​99:​100
 +/​export/​aix7200-02:​99:​100
 +/​export/​mksysb:​95:​99
 +/​export/​vios226:​99:​100
 +/​export/​vios310:​99:​100
 +/​export/​software:​99:​100
 +</​cli>​
 +
 +<cli prompt='>'>​
 +root@aixserver - /​opt/​freeware/​lib/​nagios/​plugins > cat check_fs.sh
 +</​cli>​
 +<​code>​
 +#!/bin/sh
 +#set -x
 +##################################################​
 +#@(#) check filesystems usage is higher than threshold
 +##################################################​
 +# associate file is check_fs.cfg (exceptions)
 +# please do not modify this script use check_fs.cfg
 +# check_fs.cfg format:
 +# fs_name:​pct_warn:​pct_crit
 +# check also GPFS quotas if used
 +# version: 1.0 10-2020 Manu
 +##################################################​
 +
 +STATUS=0
 +
 +STATE_OK=0
 +STATE_WARNING=1
 +STATE_CRITICAL=2
 +STATE_UNKNOWN=3
 +
 +MSG=""​
 +
 +configfile=/​usr/​local/​nagios/​libexec/​check_fs.cfg
 +os=$(uname -a | awk '​{print $1}')
 +
 +
 +defaultwarn=92
 +defaultcrit=98
 +
 +#​------------------------------------------------
 +# check_gpfs ()
 +#​------------------------------------------------
 +check_gpfs()
 +{
 +# test gpfs quota if applicable
 +lsfs -a | sed '​1d'​ | awk '​{print $4}' | grep "​mmfs"​ > /dev/null 2>&1
 +if [ $? -eq 0 ]
 +then
 +   ​quotaalert=2000000
 +   for i in sybase kplus
 +   do
 +        used=$(mmlsquota -u $i |grep clkpfs|awk '​{print $3}')
 +        max=$(mmlsquota -u $i |grep clkpfs|awk '​{print $4}')
 +        diff=$(expr $max - $used)
 +        if [ $diff -lt $quotaalert ]
 +                then
 +                echo "***** GPFS Quota-WARNING *****" >> $logname
 +                echo " User $i has ounly $diff KB free quota " >> $logname
 +                echo " Reduce used space or use mmedquota -u $i to modifie quota" >> $logname
 +        fi
 +   done
 +fi
 +}
 +
 +#​------------------------------------------------
 +# check_aix_fs ()
 +#​------------------------------------------------
 +check_aix_fs()
 +{
 +df -k | egrep -v "​Filesystem|:​|/​proc"​ | grep -v ":"​ | grep "/​dev/"​ | awk '​{print $4,​$7}'​ | sed '​s/​\%//'​ | while read pct fs
 +do
 +  if [ -e $configfile ]
 +  then
 +    line=$(grep "​^$fs:"​ $configfile)
 +    if [ "​$line"​ != ""​ ]
 +    then
 +      warn=$(echo $line |cut -d':'​ -f2)
 +      crit=$(echo $line |cut -d':'​ -f3)
 +    else
 +      warn=$defaultwarn
 +      crit=$defaultcrit
 +    fi
 +  else
 +    warn=$defaultwarn
 +    crit=$defaultcrit
 +  fi
 +
 +  if [ "​$pct"​ -gt "​$crit"​ ]
 +  then
 +    MSG=$(echo "$MSG $fs:​${pct}%"​)
 +    STATE=${STATE_CRITICAL}
 +  else
 +    if [ "​$pct"​ -gt "​$warn"​ ]
 +    then
 +      MSG=$(echo "$MSG $fs:​${pct}%"​)
 +      STATE=${STATE_WARNING}
 +    fi
 +  fi
 +
 +  if [ "​$STATUS"​ -lt "​$STATE"​ ]
 +  then
 +    STATUS=$STATE
 +  fi
 +done
 +}
 +
 +#​------------------------------------------------
 +# check_lnx_fs ()
 +#​------------------------------------------------
 +check_lnx_fs()
 +{
 +df -k | egrep -v "​Filesystem|:​|/​proc"​ | grep -v ":"​ | grep "/​dev/"​ | rev | awk '​{print $1,​$2}'​ | rev | sed '​s/​\%//'​ | while read pct fs
 +do
 +  if [ -e $configfile ]
 +  then
 +    line=$(grep "​^$fs:"​ $configfile)
 +    if [ "​$line"​ != ""​ ]
 +    then
 +      warn=$(echo $line |cut -d':'​ -f2)
 +      crit=$(echo $line |cut -d':'​ -f3)
 +    else
 +      warn=$defaultwarn
 +      crit=$defaultcrit
 +    fi
 +  else
 +    warn=$defaultwarn
 +    crit=$defaultcrit
 +  fi
 +
 +  if [ "​$pct"​ -gt "​$crit"​ ]
 +  then
 +    MSG=$(echo "$MSG $fs:​${pct}%"​)
 +    STATE=${STATE_CRITICAL}
 +  else
 +    if [ "​$pct"​ -gt "​$warn"​ ]
 +    then
 +      MSG=$(echo "$MSG $fs:​${pct}%"​)
 +      STATE=${STATE_WARNING}
 +    fi
 +  fi
 +
 +  if [ "​$STATUS"​ -lt "​$STATE"​ ]
 +  then
 +    STATUS=$STATE
 +  fi
 +done
 +}
 +
 +###########################################​
 +if [ "​$os"​ = "​Linux"​ ]
 +then
 +  check_lnx_fs
 +else
 +  if [ "​$os"​ = "​AIX"​ ]
 +  then
 +    check_aix_fs
 +  else
 +    echo "##########​ Unknown OS"
 +        STATUS=$STATE_UNKNOWN
 +  fi
 +fi
 +#check_gpfs
 +
 +case "​$STATUS"​ in
 +    "​$STATE_OK"​) ​         MSG=$(echo "​OK"​) ;;
 +    "​$STATE_WARNING"​) ​    ​MSG=$(echo "​WARNING:​ $MSG") ;;
 +    "​$STATE_CRITICAL"​) ​   MSG=$(echo "​CRITICAL:​ $MSG") ;;
 +esac
 +
 +echo $MSG
 +exit $STATUS
 +</​code>​
 +
 +==== check_ntp_aix.sh (as nagios user) ==== 
 +
 +<​code>​
 +#​!/​bin/​ksh93
 +#
 +# AIX check NTP for Nagios v1.0 06/2023
 +#  EIF
 +#
 +
 +STATE_OK=0
 +STATE_WARNING=1
 +STATE_CRITICAL=2
 +STATE_UNKNOWN=3
 +STATE_DEPENDENT=4
 +
 +MAXSEC=30 # max delay allowed in seconds
 +OUTPUT=$(ntpq -p 2>&1 | sed '​s/​$/;/​g'​)
 +
 +if [ $(echo $OUTPUT | grep refused >/​dev/​null 2>&​1;​ echo $?) -eq 0 ]
 +then
 +  EXIT_CODE=$STATE_CRITICAL
 +  OUTPUT="​Process:​ xntpd not running"​
 +else
 +  if [[ $(echo $OUTPUT | tr ';'​ '​\n'​ | sed '/​^$/​d'​ | sed 's/^\ //g' | tail +3) == ""​ ]]
 +  then
 +    EXIT_CODE=$STATE_CRITICAL
 +    OUTPUT="​Process:​ xntpd not running"​
 +  else
 +    OFFSET=$(echo $OUTPUT | tr ';'​ '​\n'​ | sed '/​^$/​d'​ | sed 's/^\ //g' | tail +3 | rev | awk '​{print $2}' | rev | cut -d'​.'​ -f1 | sed '​s/​+//'​ | sed '​s/​-//'​ | sort -u | tail -1)
 +    if [ $(echo $OUTPUT | tr ';'​ '​\n'​ | sed '/​^$/​d'​ | sed 's/^\ //g' | tail +3 | grep -q '​^\*';​ echo $?) -eq 0 ]
 +    then
 +      EXIT_CODE=$STATE_OK
 +      OUTPUT="​Process:​ xntpd offset ${OFFSET}s synchronized OK"
 +    else
 +      if [ $(echo $OFFSET) -lt "​$MAXSEC"​ ]
 +      then
 +        EXIT_CODE=$STATE_OK
 +        OUTPUT="​Process:​ xntpd offset ${OFFSET}s synchronized OK"
 +      else
 +        EXIT_CODE=$STATE_WARNING
 +        OUTPUT="​Process:​ xntpd offset ${OFFSET}s Not synchronized"​
 +      fi
 +    fi
 +  fi
 +fi
 +
 +echo $OUTPUT | tr ';'​ '​\n'​ | sed '/​^$/​d'​ | sed 's/^\ //g' | tail +3 | rev | awk '​{print $2}' | rev | cut -d'​.'​ -f1 | sed '​s/​-//'​
 +
 +echo $OUTPUT
 +exit $EXIT_CODE
 +</​code>​
 +
 +As root:
 +<​code>​
 +#​!/​bin/​ksh93
 +#
 +# AIX check NTP for Nagios v1.0 06/2023
 +#
 +
 +STATE_OK=0
 +STATE_WARNING=1
 +STATE_CRITICAL=2
 +STATE_UNKNOWN=3
 +STATE_DEPENDENT=4
 +
 +STATE=$(/​usr/​bin/​lssrc -ls xntpd > /dev/null 2>&​1;​ echo $?)
 +
 +if [ "​$STATE"​ -ne 0 ]
 +then
 +  EXIT_CODE=$STATE_CRITICAL
 +  OUTPUT="​Process:​ xntpd not running"​
 +else
 +  CODE=$(/​usr/​bin/​lssrc -ls xntpd | tr -s ' ' |  tr ' ' ';'​ | grep -i '​Leap;​indicator:'​ | cut -d':'​ -f2- | cut -d'​('​ -f1 | sed '​s/;//​g'​)
 +  if [ "​$CODE"​ == "​00"​ ]
 +  then
 +    EXIT_CODE=$STATE_OK
 +    OUTPUT="​Process:​ xntpd synchronized OK"
 +  else
 +    EXIT_CODE=$STATE_CRITICAL
 +    OUTPUT="​Process:​ xntpd Not synchronized"​
 +  fi
 +fi
 +
 echo $OUTPUT echo $OUTPUT
 exit $EXIT_CODE exit $EXIT_CODE
Line 284: Line 558:
   exit $STATE_OK   exit $STATE_OK
 fi fi
 +</​code>​
  
 +==== Check path disks ====
 +
 +<cli prompt='#'>​
 +# cat check_paths.sh
 +</​cli>​
 +
 +<​code>​
 +#!/bin/sh
 +#@(#) v1.0 Count number of paths per disk
 +# v1.1 add sudo linux
 +# v1.2 change for VSCSI
 +# v1.3 add verbose (-v), improvements linux
 +# On linux, add into /​etc/​sudoers the following lines for linux:
 +#      nagios ALL=(ALL) NOPASSWD: /​usr/​sbin/​multipath
 +#      nrpe ALL=(ALL) NOPASSWD: /​usr/​sbin/​multipath
 +
 +
 +# number of path per type of disk
 +pathviosfc=4
 +pathviosscsi=2
 +pathviossas=1
 +pathlparfc=8
 +pathlparscsi=2
 +pathlparsas=1
 +
 +
 +STATUS=0
 +
 +STATE_OK=0
 +STATE_WARNING=1
 +STATE_CRITICAL=2
 +STATE_UNKNOWN=3
 +
 +MSG=""​
 +verbose=""​
 +
 +# specified value for nb paths
 +if [ "​$npath"​ == ""​ ]
 +then
 +  nbpath=$pathlparfc
 +fi
 +
 +os=$(uname -a | awk '​{print $1}')
 +
 +grepp() { [ $# -eq 1 ] && perl -00ne "print if /$1/i" || perl -00ne "print if /$1/i" < "​$2";​}
 +
 +#​---------------------
 +count_linux_path()
 +{
 +tempfile=/​tmp/​multipath.txt
 +
 +if [ ! -x /​usr/​sbin/​multipath ] || [ $(lsscsi -s | grep -q VMware; echo $?) -eq 0 ]
 +then
 +  MSG="​OK:​ no multipathing"​
 +  verbose="​$MSG"​
 +  STATUS=$STATE_OK
 +else
 +  if [ $(timeout 30  sudo /​usr/​sbin/​multipath -ll | grep -v "​policy="​ | grep -v "​size="​ | tr -s ' ' | sed 's/\ /;/g' | sed '/​^mpath/​i \\n' > $tempfile ; echo $?) -ne "​0"​ ]
 +  then
 +    MSG="​$MSG Maybe error on sudo config"​
 +    verbose="​$MSG"​
 +    STATUS=$STATE_UNKNOWN
 +  else
 +    for i in $(cat $tempfile | grep '​^mpath'​ | awk -F';'​ '​{print $1}')
 +    do
 +      pathok=$(cat $tempfile | grepp "​^$i;"​ | grep -v "​policy="​ | grep -v "​size="​ | grep -v '​^mpath'​ | grep active | wc -l | awk '​{print $1}')
 +      pathok_pct=$(echo "​scale=1;​100*$pathok/​$nbpath"​ | bc | cut -d '​.'​ -f1)
 +      verbose="​$verbose $i;​$pathok/​$nbpath"​ # verbose message
 +      if [ "​$pathok_pct"​ -lt "​50"​ ]
 +      then
 +        MSG="​$MSG $i;​$pathok/​$nbpath"​
 +        if [ $STATUS -lt $STATE_CRITICAL ]
 +        then
 +          STATUS=$STATE_CRITICAL
 +        fi
 +      else
 +        if [ "​$pathok_pct"​ -ne "​100"​ ]
 +        then
 +          MSG="​$MSG $i;​$pathok/​$nbpath"​
 +          if [ $STATUS -lt $STATE_CRITICAL ]
 +          then
 +            STATUS=$STATE_WARNING
 +          fi
 +        fi
 +      fi
 +    done
 +  fi
 +fi
 +
 +rm -f $tempfile 2>/​dev/​null
 +}
 +
 +#​---------------------
 +count_aix_path()
 +{
 +# check not available disks
 +nbdisknok=$(lsdev -Cc disk | grep -v Available | wc -l | awk '​{print $1}')
 +if [ "​$nbdisknok"​ -ne "​0"​ ]
 +then
 +  MSG="​$MSG WARNING: $nbdisknok disks defined"​
 +  verbose="​$MSG"​
 +  STATUS=$STATE_WARNING
 +else
 +  STATUS=$STATE_OK
 +fi
 +
 +for line in $(lsdev -Cc disk | tr -s ' ' | sed 's/\ /:/' | sed 's/\ /:/' | sed 's/\ /,/g')
 +do
 +  hdisk=$(echo $line | awk -F':'​ '​{print $1}')
 +  if [ "​$(echo $line | cut -d':'​ -f3- | tr '​A-Z'​ '​a-z'​ | grep -q mpio; echo $?)" -eq "​0"​ ]
 +  then
 +    if [ ! -e /​usr/​ios/​cli/​ioscli ]
 +        then
 +          # type LPAR FC
 +          nbpath=$pathlparfc
 +    else
 +          # type VIOS FC
 +          nbpath=$pathviosfc
 +    fi
 +  else
 +    if [ "​$(echo $line | cut -d':'​ -f3- | tr '​A-Z'​ '​a-z'​ | grep -q scsi; echo $?)" -eq "​0"​ ]
 +    then
 +      if [ ! -e /​usr/​ios/​cli/​ioscli ]
 +          then
 +            # type LPAR SCSI
 +            nbpath=$pathlparscsi
 +      else
 +            # type VIOS SCSI
 +            nbpath=$pathviosscsi
 +      fi
 +    else
 +      if [ "​$(echo $line | cut -d':'​ -f3- | tr '​A-Z'​ '​a-z'​ | grep -q sas; echo $?)" -eq "​0"​ ]
 +      then
 +        if [ ! -e /​usr/​ios/​cli/​ioscli ]
 +            then
 +              # type LPAR SAS
 +              nbpath=$pathlparsas
 +        else
 +              # type VIOS SAS
 +              nbpath=$pathviossas
 +        fi
 +          fi
 +        fi
 +  fi
 +
 +  pathok=$(lspath -l $hdisk | grep Enabled | wc -l | awk '​{print $1}')
 +  pathok_pct=$(echo "​scale=1;​100*$pathok/​$nbpath"​ | bc | cut -d '​.'​ -f1)
 +  verbose="​$verbose $hdisk;​$pathok/​$nbpath"​
 +  if [ "​$pathok_pct"​ -lt "​50"​ ]
 +  then
 +    MSG="​$MSG $hdisk;​$pathok/​$nbpath"​
 +    if [ $STATUS -lt $STATE_CRITICAL ]
 +    then
 +      STATUS=$STATE_CRITICAL
 +    fi
 +  else
 +    if [ "​$pathok_pct"​ -ne "​100"​ ]
 +    then
 +      MSG="​$MSG $hdisk;​$pathok/​$nbpath"​
 +      if [ $STATUS -lt $STATE_CRITICAL ]
 +      then
 +        STATUS=$STATE_WARNING
 +      fi
 +    fi
 +  fi
 +done
 +}
 +
 +######################​
 +if [ "​$os"​ = "​Linux"​ ]
 +then
 +  count_linux_path
 +else
 +  if [ "​$os"​ = "​AIX"​ ]
 +  then
 +    count_aix_path
 +  else
 +    echo "##########​ Unknown OS"
 +        STATUS=$STATE_UNKNOWN
 +  fi
 +fi
 +
 +if [ $STATUS -eq $STATE_OK ]
 +then
 +  echo "​OK"​
 +else
 +  echo "​$MSG"​
 +fi
 +
 +# For debug
 +if [ "​$1"​ = "​-v"​ ]
 +then
 +  echo "​$verbose"​ | tr ' ' '​\n'​
 +fi
 +
 +exit $STATUS
 </​code>​ </​code>​
 +
 +
 +==== Check SEA status ====
 +
 +Check Shared Ethernet Adapter status on VIOS
 +<cli prompt='#'>​
 +# cat check_sea.sh
 +</​cli>​
 +
 +<​code>​
 +#!/bin/ksh
 +# v1.1 eif check LACP
 +#
 +# Add sudoers:
 +#nagios ALL=(ALL) NOPASSWD: /​usr/​bin/​entstat
 +#nrpe ALL=(ALL) NOPASSWD: /​usr/​bin/​entstat
 +
 +
 +#valeurs retour Nagios
 +STATE_OK=0
 +STATE_WARNING=1
 +STATE_CRITICAL=2
 +STATE_UNKNOWN=3
 +STATE_DEPENDENT=4
 +EXITSTATUS=$STATE_UNKNOWN # Default Exit Code as UNKNOWN.
 +
 +#​Recuperation variable
 +hostname=$(hostname)
 +SEA=$(lsdev -Cc adapter | grep Shared|grep -v Defined | wc -l | awk '​{print $1}')
 +
 +#​Initialisation
 +if [ $SEA = "​0"​ ]
 +then
 +  MSG="​No SEA available on $hostname"​
 +  EXITSTATUS=2 #Exit status critical
 +else
 +  #​Verification ensemble des cartes si disponible
 +  for ent in $(lsdev -Cc adapter | grep Shared | awk '​{print $1}')
 +  do
 +    # check physical ports / links
 +    count_all=$(sudo /​usr/​bin/​entstat -d $ent 2>&1 | grep -i status | grep "​Physical Port Link Status"​ | wc -l | awk '​{print $1}')
 +    count_up=$(sudo /​usr/​bin/​entstat -d $ent 2>&1 | grep -i status | grep "​Physical Port Link Status"​ | grep Up |wc -l | awk '​{print $1}')
 +
 +    if [ $count_up -ne $count_all ]
 +    then
 +      MSG="​One or more Ethernet port down"
 +      EXITSTATUS=1 #Exit status Warning
 +    else
 +      MSG="​No ethernet port issue"
 +      EXITSTATUS=0 #Exit status OK
 +    fi
 +
 +    # check physical ports Speed, must be the same on all ports
 +    nb_speed=$(sudo /​usr/​bin/​entstat -d $ent 2>&1 | grep "​Physical Port Speed" | sort -u | wc -l | awk '​{print $1}')
 +
 +    if [ $nb_speed != "​1"​ ]
 +    then
 +      MSG="​Ethernet Ports bad speed"
 +      EXITSTATUS=1 #Exit status Warning
 +    fi
 +
 +    # check LACP
 +    count_all=$(sudo /​usr/​bin/​entstat -d $ent 2>&1 | grep "​Synchronization"​ | wc -l | awk '​{print $1}')
 +    count_up=$(sudo /​usr/​bin/​entstat -d $ent 2>&1 | grep "​Synchronization"​ | grep '​IN_SYNC'​ | wc -l | awk '​{print $1}')
 +    echo "​count_all=$count_all count_up=$count_up"​ >> /tmp/aaa
 +    if [ $count_up -ne $count_all ]
 +    then
 +      MSG="​LACP not synced"​
 +      EXITSTATUS=1 #Exit status Warning
 +    fi
 +  done
 +fi
 +
 +echo $MSG
 +exit $EXITSTATUS
 +</​code>​
 +
  
aix/scripts_category/nagios_aix_check.1610974377.txt.gz · Last modified: 2021/01/18 13:52 by manu