This is an old revision of the document!
[root@nagios plugins]# cat check_ibm_storwize_quorum.sh
#!/bin/sh
#set -x
##################################################
#@(#) check SVC IP quorum
##################################################
# version: 1.0 02-2022 Manu
##################################################
STATUS=0
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
cluster=$(echo $1 | sed 's/\-cluster=//')
user="nagios"
quorumsrv="witness01.ad.com"
if [[ $(ssh $user@$cluster 'lsquorum -delim :' | grep ':online:' | grep ':device:' | grep ":${quorumsrv}" | wc -l | awk '{print $1}') == "1" ]]
then
  STATUS=$STATE_OK
  MSG="IP quorum: $quorumsrv OK"
else
  STATUS=$STATE_CRITICAL
  MSG="IP quorum: $quorumsrv down!!!"
fi
echo $MSG
exit $STATUS
[root@nagios plugins]# cat check_ibm_storwize_quorum.sh
[root@nagios plugins]# cat check_ibm_storwize.pl
#!/usr/bin/perl
#
# check_ibm_v7000_svc Nagios check
# you need to configure a ssh key without passphrase for logins to your V7000 storage array.
# IBM SVC.pm Perl Module is required and available here:
# https://www14.software.ibm.com/webapp/iwm/web/preLogin.do?source=AW-0NK
# http://www.alphaworks.ibm.com/tech/svctools
#
# Martin Leucht <mleucht@ipb.de> 09.01.2013
# Version 1.1
#
# fixed version 1.2 03.04.2014
#
# Usage: check_ibm_v7000_svc.pl -cluster=[IP or hostname] -check=[check] [ -exclude=[listofitems] ]
#
# this script can check the following things of your IBM V7000 SVC
#
# lsarray -> state of array MDisks
# lshost -> state of attached hosts
# lsdrive -> state of drives
# lsmdiskgrp -> state of svc managed disk groups
# lsmdisk -> state of external managed disks
# lunonline / lsvdisk -> state of volumes (lun's/vdisk's)
# lsenclosure -> state of enclosures
# lsenclosurebattery -> state of batteries
# lsnodebattery -> for SVC
# lsenclosurecanister -> state of canister/nodes
# lsnode -> for SVC
# lsenclosurepsu -> state of psu's
# lsenclosureslot -> state of port enclosure slots
# luninsync / lsrcrelationship -> state of syncs of your volumes (if you have more than one machine and mirroring)
# lsenclosureslotx -> state of enclosure slot port
#
# TODO: ISCSI port check (BUG in Perl module - lsportip state is not provided via IBM::SVC)
#
#   example nagios command definition
#
# define command {
#    command_name    check_ibm_v7000_svc
#    command_line    $USER1$/check_ibm_v7000_svc.pl -cluster=$HOSTADDRESS$ -check=$ARG1$ -v
# }
#
#   example command excluding some items
#
# define command {
#    command_name    check_ibm_v7000_svc_exclude
#    command_line    $USER1$/check_ibm_v7000_svc.pl -cluster=$HOSTADDRESS$ -check=$ARG1$ -exclude=$ARG2$ -v
# }
#
#   ... and service definition
#
# define service {
#    use                             severity2,noperf
#    service_description             check_lunsonline
#    host_name                       storwize01,storwize02
#    check_command                   check_ibm_v7000_svc!lunonline!'lun1,lun2'
# }
use strict;
use IBM::SVC1;
use Getopt::Long;
#use Data::Dumper;
my ($cluster, $ssh, $verbose, $user, $check, $help, $exclude);
$ssh="ssh";
$user="nagios";
my $state="";
my $message="";
my $params={};
my $svc_command="";
my $item="";
my $item_name="";
my $item_state="";
my $msg="";
my @exclude="";
my $x=0;
my %excl=();
GetOptions (
        "cluster=s"      => \$cluster,
        "verbose|v"      => \$verbose,
        "ssh=s"          => \$ssh,
        "check=s"            => \$check,
        "help|h"             => \$help,
        "exclude=s"          => \$exclude,
        "user=s"             => \$user
);
if ( !$cluster || !$check || $help) {
    print_usage();
    exit(0);
}
# Nagios exit states
our %states = (
        OK       => 0,
        WARNING  => 1,
        CRITICAL => 2,
        UNKNOWN  => 3
);
# Nagios state names
our %state_names = (
        0 => 'OK',
        1 => 'WARNING',
        2 => 'CRITICAL',
        3 => 'UNKNOWN'
);
# get excluded items
if ($exclude ne "") {
        if ($check eq "lunonline" || $check eq "luninsync" || $check eq "lshost") {
                @exclude = split(",",$exclude);
        foreach (@exclude) {
                        $x++;
            $excl{$_} = $x;
                }
        } else {
                print "excluding is only available for lunonline, luninsync and lshost check!\n";
                exit(0);
        }
}
# return states SVC/V7000 (see on console by typing svc_command -h
# generic_states for lunonline, lsenclosure,lsmdiskgrp,lsmdisk
# lsenclosurecanister, lsenclosurepsu, lsenclosureslot checks
my %generic_states = (
                                'online'                        =>      'OK',
                                'offline'                       =>      'CRITICAL',
                                'degraded'                      =>      'CRITICAL', );
my %luninsync_states = (
                                'consistent_synchronized'       =>      'OK',
                                'consistent_copying'            =>      'OK',
                                'inconsistent_stopped'          =>      'CRITICAL',
                                'consistent_stopped'            =>      'WARNING',
                                'idling'                        =>      'WARNING',
                                'consistent_disconnected'       =>      'CRITICAL',
                                'inconsistent_copying'          =>      'CRITICAL',
                                'idling_disconnected'           =>      'CRITICAL',
                                'inconsistent_disconnected'     =>      'CRITICAL',     );
my %lsdrive_states = (
                                'online'                        =>      'OK',
                                'offline'                       =>      'CRITICAL',
                                'degraded'                      =>      'CRITICAL', );
my %lsmdisk_states = (
                                'offline'                       =>      'CRITICAL',
                                'degraded'                      =>      'CRITICAL',
                                'syncing'                       =>      'WARNING',
                                'initting'                      =>      'OK',
                                'online'                        =>      'OK', );
my %lsiogrp_states = (
                                '2'                             =>      'OK',
                                '1'                             =>      'CRITICAL',);
my %lsenclosureslot_states = (
                                'online'                        =>      'OK',
                                'excluded_by_drive'             =>      'WARNING',
                                'excluded_by_enclosure'         =>      'WARNING',
                                'excluded_by_system'            =>      'WARNING',);
my %lshost_states = (
                                'online'                        =>      'OK',
                                'offline'                       =>      'CRITICAL',
                                'degraded'                      =>      'WARNING', );
# do not edit anything below this
sub print_usage {
        print <<EOU;
    Usage: $0 -cluster=[IP or hostname] -check=[check] -v
    -cluster            Hostname or IP address of V7000 SVC (*required*)
    -check              Checktype (*required*)
                        Possible values are:
                        * lunonline|luninsync
                        * lsmdisk|lsdrive|lsenclosure|lsenclosurebattery
                        * lsenclosurecanister|lsenclosurepsu|lsenclosureslotx
                        ( lsenclosureslotx = lsenclosureslot1 ... lsenclosureslotx )
    -user                       Username which is configured on your v7000 (default nagios)
    -ssh                        ssh method - the ssh command to use. Possible values are:
                        * "ssh" (default)
                        * "plink" (PUTTY)
    -exclude                comma separated list of excluded vdisknames (lunonline check)
                        or consistency group names (luninsync check)
    -h -help                Print this help
    -v -verbose         verbose output (OK items are listed)
EOU
        exit (0);
}
# Set parameters for svc connection
$params->{'cluster_name'} = $cluster if $cluster;
$params->{'user'} = $user if $user;
$params->{'ssh_method'} = $ssh if $ssh;
#$params->{'keyfile'} = $keyfile if $keyfile;
# Create the connection with the parameters set above.
my $svc = IBM::SVC->new($params);
if ($check eq "lunonline") {
        $svc_command = "lsvdisk";
        $item = "vdisks (luns)";
        $item_name = "name";
        $item_state = "status";
        &itemcheck(\%generic_states,\%excl)
} elsif ($check eq "luninsync") {
    # * consistent_synchronized
    # * consistent_copying
    # * inconsistent_stopped
    # * consistent_stopped
    # * idling
    # * consistent_disconnected'
    # * inconsistent_copying
    # * idling_disconnected
    # * inconsistent_disconnected
        $svc_command = "lsrcrelationship";
        $item = "consistency groups";
        $item_name = "consistency_group_name";
        $item_state = "state";
        &itemcheck(\%luninsync_states,\%excl)
} elsif ($check eq "lsmdisk") {
    # * offline - the array is offline on all nodes
    # * degraded - the array has deconfigured or offline members; the array is not fully redundant
    # * syncing - array members are all online, the array is syncing parity or mirrors to achieve redundancy
    # * initting - array members are all online, the array is initializing; the array is fully  redundant
    # * online - array members are all online, and the array is fully redundant
        $svc_command = "lsmdisk";
        $item = "mdisk";
        $item_name = "name";
        $item_state = "status";
        &itemcheck(\%lsmdisk_states)
} elsif ($check eq "lsiogrp") {
    # * 2 - the array is operating normally
    # * 1 - the array has deconfigured or offline nodes; the array is not fully redundant
        $svc_command = "lsiogrp";
        $item = "iogrp";
        $item_name = "name";
        $item_state = "node_count";
        &itemcheck(\%lsiogrp_states)
} elsif ($check eq "lshost") {
    # * 2 - the array is operating normally
    # * 1 - the array has deconfigured or offline nodes; the array is not fully redundant
        $svc_command = "lshost";
        $item = "host";
        $item_name = "name";
        $item_state = "status";
        &itemcheck(\%lshost_states,\%excl)
} elsif ($check eq "lsmdiskgrp") {
    # * offline - the array is offline on all nodes
    # * degraded - the array has deconfigured or offline members; the array is not fully redundant
    # * syncing - array members are all online, the array is syncing parity or mirrors to achieve redundancy
    # * initting - array members are all online, the array is initializing; the array is fully  redundant
    # * online - array members are all online, and the array is fully redundant
        $svc_command = "lsmdiskgrp";
        $item = "mdiskgrp";
        $item_name = "name";
        $item_state = "status";
        &itemcheck(\%generic_states)
} elsif ($check eq "lsdrive") {
    # * online: blank
    # * degraded: populated if associated with an error
    # * offline: must be populated
        $svc_command = "lsdrive";
        $item = "mdisk member";
        $item_name = "id";
        #$item_state = "error_sequence_number";
        $item_state = "status";
        &itemcheck(\%lsdrive_states)
} elsif ($check eq "lsenclosure") {
    # Indicates if an enclosure is visible to the SAS network:
        $svc_command = "lsenclosure";
        $item = "enclosure(s)";
        $item_name = "id";
        $item_state = "status";
        &itemcheck(\%generic_states)
} elsif ($check eq "lsenclosurebattery") {
    # The status of the battery:
        $svc_command = "lsenclosurebattery";
        $item = "enclosurebatteries";
        $item_name = "battery_id";
        $item_state = "status";
        &itemcheck(\%generic_states)
} elsif ($check eq "lsnodebattery") {
    # The status of the battery:
        $svc_command = "lsnodebattery";
        $item = "enclosurebatteries";
        $item_name = "battery_id";
        $item_state = "status";
        &itemcheck(\%generic_states)
} elsif ($check eq "lsenclosurecanister") {
    # The status of the canister:
        $svc_command = "lsenclosurecanister";
        $item = "enclosurecanister(s)";
        $item_name = "node_name";
        $item_state = "status";
        &itemcheck(\%generic_states)
} elsif ($check eq "lsnode") {
    # The status of the canister:
        $svc_command = "lsnode";
        $item = "enclosurecanister(s)";
        $item_name = "name";
        $item_state = "status";
        &itemcheck(\%generic_states)
} elsif ($check eq "lsenclosurepsu") {
    # The status of the psu(s)
        $svc_command = "lsenclosurepsu";
        $item = "enclosurepsu(s)";
        $item_name = "PSU_id";
        $item_state = "status";
        &itemcheck(\%generic_states)
} elsif ($check =~ m/^lsenclosureslot(\d+)$/ ) {
    # The status of enclosure slot port x. If the port is bypassed for multiple reasons, only one is shown.
    # In order of priority, they are:
    # * online: enclosure slot port x is online
    # * excluded_by_drive: the drive excluded the port
    # * excluded_by_enclosure: the enclosure excluded the port
    # * excluded_by_system: the clustered system (system) has excluded the port
        $svc_command = "lsenclosureslot";
        $item = "enclosureslots port" . $1;
        $item_name = "slot_id";
        $item_state = "port_" . $1 . "_status";
        &itemcheck(\%lsenclosureslot_states)
} else {
        $state = 'UNKNOWN';
        $message = "the check you provided does not exist";
}
# main check subroutine
sub itemcheck {
        # get hash reference(s) from subroutine call
        my $v7000_states=shift;
        my $excluded=shift;
        my @critical_items = "";
        my @warn_items = "";
        my @ok_items = "";
        my @all_items = "";
        my $criticalcount =  0;
    my $warncount =  0;
    my $okcount =  0;
    my ($items_desc, $item_desc, $final_item_state);
    # query storage cluster
    my ($rc,$all_items_ref) =  $svc->svcinfo($svc_command,{});
        if ($rc == 0) {
                @all_items = @$all_items_ref;
                if (scalar(@all_items) == 0) {
                        $state = 'WARNING';
                        $message = "Could not find any entry for $item";
                } else {
                        foreach my $items_params (@all_items) {
                $item_desc = "$items_params->{$item_name}";
                chomp($item_desc);
                                #print Dumper($items_params);
                                # ommit excluded and blank items
                next if $excluded->{$item_desc} || $item_desc =~ m/^s*$/g;
                                $final_item_state = "$items_params->{$item_state}";
                                if ($v7000_states->{$final_item_state} eq 'OK' ) {
                                        $okcount++;
                                        push (@ok_items, $item_desc);
                                } elsif ($v7000_states->{$final_item_state} eq 'WARNING' ) {
                                        $warncount++;
                                        $msg = "$item_desc ($final_item_state) ";
                    push (@warn_items, $msg);
                                } elsif ($v7000_states->{$final_item_state} eq 'CRITICAL' ) {
                                        $criticalcount++;
                                        $msg = "$item_desc ($final_item_state) ";
                                        push (@critical_items, $msg);
                                }
                        }
                }
        } else {
                print "Cannot connect to cluster $cluster\n";
                exit $states{'CRITICAL'};
        }
        if ( $warncount == 0 && $criticalcount == 0 && $okcount > 0 ) {
                $state = 'OK';
                if ($verbose) {
                        $message = "$state: all $item $final_item_state [" . join(" ",@ok_items) . " ]";
                } else {
                        $message = "$state: all $item $final_item_state";
                }
        } elsif ( $warncount > 0 && $criticalcount == 0  ) {
                $state = 'WARNING';
        $message = "$state:" .  join(" ",@warn_items) ;
        } elsif ( ( $warncount > 0 && $criticalcount > 0 ) || ( $warncount == 0 && $criticalcount > 0 )  ) {
                $state = 'CRITICAL';
                $message = "$state:" .  join(" ",@critical_items) . " " . join(" ",@warn_items) ;
        } else {
                $state = 'UNKNOWN';
                $message = "$state: Could not find status information or items" ;
        }
}
print $message."\n";
exit $states{$state};
# backup_config_storage_ibm.sh
#!/bin/bash
#set -x
#
#@(#) Script to backup svc
#
# version: 1.1 03-2022 eif
#
dir=`dirname $0`
. $dir/.env
# Variables
destination="mail@test.lu"
user_repoting="report"
SSH="ssh -o ConnectTimeout=30 ${user_repoting}@"
logfile=$logpath/list_storage_users.txt
list=$binpath/storage_list.txt
savedir=/workdir/svc/backup
svc_user="report"
DATE=$(date +%Y%m%d)
DAY=$(date +%d)    # backup rotation 30 days
# create backup folder
if [[ ! -d $savedir ]]
then
  mkdir -p $savedir
fi
#----------------------
backup_svc ()
{
date '+%Y-%m-%d %H:%M:%S'
for storage in $(cat ${list})
do
  connection="${svc_user}@${storage}"
  savedir1=$savedir/$storage
  if [[ ! -d $savedir1 ]]
  then
    mkdir -p $savedir1
  fi
  rm -f $savedir1/svc.config.backup* >/dev/null 2>&1
  echo "###  Start Backup of storage:$(date '+%Y-%m-%d %H:%M:%S') $storage"
  ssh $connection svcconfig backup
  RC=$?
  if [[ $RC != 0 ]]
  then
    echo "# Backup failure on storage: ${storage}"
    mailx -s "Backup failure on storage: ${storage}"  $destination
    exit 1
  else
    echo "# Backup Success on storage: ${storage}"
  fi
  echo "###  Copy Backup of storage: $storage"
  scp $connection:/dumps/svc.config.backup* $savedir1
  RC=$?
  if [[ $RC != 0 ]]
  then
    echo "#******* Copy Backup failure (scp) for storage: ${storage}"
    mailx -s "Copy Backup failure (scp) for storage: ${storage}"  $destination
    exit 1
  else
    echo "# Copy Backup Success (scp) for storage: ${storage}"
  fi
  # compress backup.... + delete old files (30 j)
  cd $savedir1
  rm -f svc_conf_backup_$DAY.tar* >/dev/null 2>&1
  tar -cvf svc_conf_backup_$DAY.tar svc.config.backup*
  gzip svc_conf_backup_$DAY.tar
  echo
done
date '+%Y-%m-%d %H:%M:%S'
}
#########################
# Main
#########################
main ()
{
backup_svc
}
main > $logname.$(date '+%u') 2>&1
Disable LUN in thinprovision warning threshold
# chvdisk_warning.sh
#!/bin/bash
#
#@(#) Scripts to remove warning threshold on volumes
#
# version: 1.0 10-2023 Manu
dir=`dirname $0`
. $dir/.env
DATE=$(date +%Y%m%d)
YEAR=$(date +%Y)
MONTH=$(date +%m)
user_repoting="report"
SSH="ssh -o ConnectTimeout=30 ${user_repoting}@"
list=$binpath/storage_list.txt
storage=v7000-01
#----------------------
get_disklist ()
{
date
${SSH}${storage} "lsvdisk -delim : -nohdr" > $logpath/lsvdisk.${storage}.txt
for vol in $(cat $logpath/lsvdisk.${storage}.txt | cut -d':' -f2 | sort -u)
do
  echo "$vol: "$(${SSH}${storage} "lsvdisk $vol" | egrep 'warning|copy_id')
done > $logpath/lsvdisk.${storage}.warning
}
#----------------------
generate_svccmd ()
{
for line in $(cat $logpath/lsvdisk.${storage}.warning | sed 's/\ copy_id/,copy_id/g' | sed 's/\ /;/g')
do
  vdisk=$(echo $line | cut -d':' -f1)
  line1=$(echo $line | cut -d':' -f2- | sed 's/,//' | cut -d',' -f1)
  copy1=$(echo $line1 | sed 's/copy_id;//' | cut -d';' -f1)
  warning1=$(echo $line1 | cut -d';' -f4)
  line2=$(echo $line | cut -d':' -f2- | sed 's/,//' | cut -d',' -f2- | sed 's/,//')
  copy2=$(echo $line2 | sed 's/copy_id;//' | cut -d';' -f1)
  warning2=$(echo $line2 | cut -d';' -f4)
  if [[ "$warning1" -ne "" ]]
  then
    if [[ "$warning1" -ne "0" ]]
    then
      echo "chvdisk -copy $copy1 -warning 0 $vdisk"
    fi
  fi
  if [[ "$warning2" -ne "" ]]
  then
    if [[ "$warning2" -ne "0" ]]
    then
      echo "chvdisk -copy $copy2 -warning 0 $vdisk"
    fi
  fi
done
}
#########################
main ()
{
date
echo "*** Get SVC disk list"
get_disklist
echo "*** Generate SVC cmd"
generate_svccmd > $logpath/chvdisk.cmd
}
main > $logname 2>&1
cat $logpath/chvdisk.cmd
Daily copy auditlog into a file and consolidate per year
# check_audit_log.sh
#!/bin/bash
#
#@(#) Scripts to collect auditlogs from SVC / V7K users, once per day
# archived by month / year and visible on wiki
#
# version: 1.0 01-2020 Manu
dir=`dirname $0`
. $dir/.env
DATE=$(date +%Y%m%d)
YEAR=$(date +%Y)
MONTH=$(date +%m)
workdir=/workdir/svc/backup/auditlog
savedir=$workdir/$YEAR
index=$workdir/auditlog.txt
user_repoting="report"
SSH="ssh -o ConnectTimeout=30 ${user_repoting}@"
list=$binpath/storage_list.txt
# create backup folder
if [[ ! -d $savedir ]]
then
  mkdir -p $savedir
fi
#----------------------
get_auditlog ()
{
date
for storage in $(cat ${list})
do
  echo "Audit log for $storage"
  if [ -f $savedir/${storage}_${MONTH}.out ]
  then
    cp $savedir/${storage}_${MONTH}.out $savedir/${storage}_${MONTH}.log
  fi
  ${SSH}${storage} "catauditlog -delim ';'" >> $savedir/${storage}_${MONTH}.log
  cat $savedir/${storage}_${MONTH}.log | sort -u > $savedir/${storage}_${MONTH}.out
done
}
#----------------------
generate_log_file ()
{
# find all files .out create up to 300 days
# Formating page for Dokuwiki
echo "====== Last User Audit log for storage ======" > ${index}
echo "" >> ${index}
for storage in $(cat ${list})
do
  Ustorage=$(echo ${storage} | tr 'a-z' 'A-Z')
  output=$workdir/auditlog_${storage}.txt
  cat /dev/null > $output
  for file1 in $(find $workdir -name "${storage}_*.out" -mtime -300)
  do
    cat $file1 | grep -v "/dumps/svc.config" | sort -u >> $output
  done
  cat $output | sort -u > $output.2
  tac $output.2 > $output.1
  echo "====== Last Changes on ${Ustorage}  ======" > $output
  date "+%d-%m-%Y %H:%M" >> $output
  echo '#code#' >> $output
  cat $output.1 >> $output
  echo '#/code#' >> $output
  echo "[[storage:svc:auditlog_${storage}|${Ustorage}]]" >> ${index}
  echo "" >> ${index}
  rm -f $output.1 $output.2
done
}
#------------------------------------------------
send_wiki()
{
dokupath=/var/www/html/dokuwiki/data/pages/storage/svc
dokuuser=apache
dokugrp=apache
for file1 in auditlog_*.txt
do
  sudo su - root -c "/bin/cp $workdir/$file1 $dokupath"
done
sudo su - root -c "/bin/cp ${index} $dokupath"
sudo su - root -c "/bin/chown -R $dokuuser.$dokugrp $dokupath"
}
#########################
# Main
#########################
main ()
{
date
echo "*** Get audit log on storage"
get_auditlog
echo "*** Generate main audit log file"
generate_log_file
echo "*** Copy to Wiki"
send_wiki
}
main > $logname 2>&1