This is an old revision of the document!
[root@aix001]/usr/local/nagios/libexec# cat check_paths.sh
#!/bin/sh
#@(#) v1.0 Count number of paths per disk
# v1.1 add sudo linux
# v1.2 change for VSCSI
# v1.3 add verbose (-v), improvements linux
# number of path per type of disk
pathviosfc=4
pathviosscsi=2
pathviossas=1
pathlparfc=8
pathlparscsi=2
pathlparsas=1
STATUS=0
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
MSG=""
verbose=""
# specified value for nb paths
if [ "$npath" == "" ]
then
nbpath=$pathlparfc
fi
os=$(uname -a | awk '{print $1}')
grepp() { [ $# -eq 1 ] && perl -00ne "print if /$1/i" || perl -00ne "print if /$1/i" < "$2";}
#---------------------
count_linux_path()
{
tempfile=/tmp/multipath.txt
if [ ! -x /usr/sbin/multipath ] || [ $(lsscsi -s | grep -q VMware; echo $?) -eq 0 ]
then
MSG="OK: no multipathing"
verbose="$MSG"
STATUS=$STATE_OK
else
if [ $(timeout 30 sudo /usr/sbin/multipath -ll | grep -v "policy=" | grep -v "size=" | tr -s ' ' | sed 's/\ /;/g' | sed '/^mpath/i \\n' > $tempfile ; echo $?) -ne "0" ]
then
MSG="$MSG Maybe error on sudo config"
verbose="$MSG"
STATUS=$STATE_UNKNOWN
else
for i in $(cat $tempfile | grep '^mpath' | awk -F';' '{print $1}')
do
pathok=$(cat $tempfile | grepp "^$i;" | grep -v "policy=" | grep -v "size=" | grep -v '^mpath' | grep active | wc -l | awk '{print $1}')
pathok_pct=$(echo "scale=1;100*$pathok/$nbpath" | bc | cut -d '.' -f1)
verbose="$verbose $i;$pathok/$nbpath" # verbose message
if [ "$pathok_pct" -lt "50" ]
then
MSG="$MSG $i;$pathok/$nbpath"
if [ $STATUS -lt $STATE_CRITICAL ]
then
STATUS=$STATE_CRITICAL
fi
else
if [ "$pathok_pct" -ne "100" ]
then
MSG="$MSG $i;$pathok/$nbpath"
if [ $STATUS -lt $STATE_CRITICAL ]
then
STATUS=$STATE_WARNING
fi
fi
fi
done
fi
fi
rm -f $tempfile 2>/dev/null
}
#---------------------
count_aix_path()
{
# check not available disks
nbdisknok=$(lsdev -Cc disk | grep -v Available | wc -l | awk '{print $1}')
if [ "$nbdisknok" -ne "0" ]
then
MSG="$MSG WARNING: $nbdisknok disks defined"
verbose="$MSG"
STATUS=$STATE_WARNING
else
STATUS=$STATE_OK
fi
for line in $(lsdev -Cc disk | tr -s ' ' | sed 's/\ /:/' | sed 's/\ /:/' | sed 's/\ /,/g')
do
hdisk=$(echo $line | awk -F':' '{print $1}')
if [ "$(echo $line | cut -d':' -f3- | tr 'A-Z' 'a-z' | grep -q mpio; echo $?)" -eq "0" ]
then
if [ ! -e /usr/ios/cli/ioscli ]
then
# type LPAR FC
nbpath=$pathlparfc
else
# type VIOS FC
nbpath=$pathviosfc
fi
else
if [ "$(echo $line | cut -d':' -f3- | tr 'A-Z' 'a-z' | grep -q scsi; echo $?)" -eq "0" ]
then
if [ ! -e /usr/ios/cli/ioscli ]
then
# type LPAR SCSI
nbpath=$pathlparscsi
else
# type VIOS SCSI
nbpath=$pathviosscsi
fi
else
if [ "$(echo $line | cut -d':' -f3- | tr 'A-Z' 'a-z' | grep -q sas; echo $?)" -eq "0" ]
then
if [ ! -e /usr/ios/cli/ioscli ]
then
# type LPAR SAS
nbpath=$pathlparsas
else
# type VIOS SAS
nbpath=$pathviossas
fi
fi
fi
fi
pathok=$(lspath -l $hdisk | grep Enabled | wc -l | awk '{print $1}')
pathok_pct=$(echo "scale=1;100*$pathok/$nbpath" | bc | cut -d '.' -f1)
verbose="$verbose $hdisk;$pathok/$nbpath"
if [ "$pathok_pct" -lt "50" ]
then
MSG="$MSG $hdisk;$pathok/$nbpath"
if [ $STATUS -lt $STATE_CRITICAL ]
then
STATUS=$STATE_CRITICAL
fi
else
if [ "$pathok_pct" -ne "100" ]
then
MSG="$MSG $hdisk;$pathok/$nbpath"
if [ $STATUS -lt $STATE_CRITICAL ]
then
STATUS=$STATE_WARNING
fi
fi
fi
done
}
######################
if [ "$os" = "Linux" ]
then
count_linux_path
else
if [ "$os" = "AIX" ]
then
count_aix_path
else
echo "########## Unknown OS"
STATUS=$STATE_UNKNOWN
fi
fi
if [ $STATUS -eq $STATE_OK ]
then
echo "OK"
else
echo "$MSG"
fi
# For debug
if [ "$1" = "-v" ]
then
echo "$verbose" | tr ' ' '\n'
fi
exit $STATUS
[root@nagios plugins]# cat check_ibm_storwize_quorum.sh
#!/bin/sh
#set -x
##################################################
#@(#) check SVC IP quorum
##################################################
# version: 1.0 02-2022 Manu
##################################################
STATUS=0
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
cluster=$(echo $1 | sed 's/\-cluster=//')
user="nagios"
quorumsrv="witness01.ad.com"
output=$(ssh $user@$cluster 'lsquorum -delim :')
STATUS=0
MSG=""
if [[ $(echo $output | tr ' ' '\n' | grep ':online:' | grep ':device:' | grep ":${quorumsrv}" | wc -l | awk '{print $1}') -ge "1" ]]
then
(( STATUS = STATUS + 0 ))
MSG="IP quorum: $quorumsrv OK"
else
(( STATUS = STATUS + 1 ))
MSG="IP quorum: $quorumsrv down!!!"
fi
if [[ $(echo $output | tr ' ' '\n' | grep ':online:' | grep -v ':device:' | wc -l | awk '{print $1}') -ge "2" ]]
then
(( STATUS = STATUS + 0 ))
MSG=$(echo "$MSG | nb quorum OK")
else
(( STATUS = STATUS + 1 ))
MSG=$(echo "$MSG | bad number of quorum !!")
fi
echo $MSG
exit $STATUS
[root@lnxb080 plugins]# cat check_storage_host_paths.sh
#!/bin/sh
#set -x
##################################################
#@(#) check host paths on SVC
##################################################
# version: 1.0 02-2022 Manu
##################################################
STATUS=0
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
cluster=$(echo $1 | sed 's/\-cluster=//')
user="nagios"
SSH="ssh -o ConnectTimeout=30 ${user}@"
list="svc01"
MSG=""
exclude=':none:'
word="$1"
case "$word" in
"esx" ) count=2
exclude=':none:' ;;
"aix" ) count=4
exclude="aixv" ;;
"lnx" ) count=4
exclude=':none:' ;;
"vio" ) count=2
word=aixv
exclude=':none:' ;;
"win" ) count=2
word='win|mssql'
exclude=':none:' ;;
"other" ) count=4
word='svc01'
exclude='esx|aix|lnx|win|mssql' ;;
"*" ) MSG="wrong parameter" ;;
esac
for storage in $(echo ${list})
do
cmd='lshost -delim : -nohdr'
for node in $(${SSH}${storage} $cmd | cut -d':' -f2)
do
cmd="lshost -delim : $node"
MSG1=$(echo "$storage:$node:$(${SSH}${storage} $cmd | grep -c "state:active")" | grep -v ':0$' | egrep -i "${word}" | grep -v ":${count}$" | egrep -vi "${exclude}")
if [ "$MSG1" != "" ]
then
MSG=$(echo "${MSG};${MSG1}")
fi
done
done
if [ "$MSG" == "" ]
then
RC=0
else
RC=2
echo $MSG
fi
exit $RC
[root@nagios plugins]# cat check_ibm_storwize.pl
#!/usr/bin/perl
#
# checks for SVC: lsnode lsnodebattery lsmdisk lsmdiskgrp lunonline
# checks flash storage: lsnodecanister lsenclosurebattery lsmdisk lsmdiskgrp lsdrive lunonline
#
# check_ibm_v7000_svc Nagios check
# you need to configure a ssh key without passphrase for logins to your V7000 storage array.
# IBM SVC.pm Perl Module is required and available here:
# https://www14.software.ibm.com/webapp/iwm/web/preLogin.do?source=AW-0NK
# http://www.alphaworks.ibm.com/tech/svctools
#
# Martin Leucht <mleucht@ipb.de> 09.01.2013
# Version 1.1
#
# fixed version 1.2 03.04.2014
#
# Usage: check_ibm_v7000_svc.pl -cluster=[IP or hostname] -check=[check] [ -exclude=[listofitems] ]
#
# this script can check the following things of your IBM V7000 SVC
#
# lsarray -> state of array MDisks
# lshost -> state of attached hosts
# lsdrive -> state of drives
# lsmdiskgrp -> state of svc managed disk groups
# lsmdisk -> state of external managed disks
# lunonline / lsvdisk -> state of volumes (lun's/vdisk's)
# lsenclosure -> state of enclosures
# lsenclosurebattery -> state of batteries
# lsnodebattery -> for SVC
# lsenclosurecanister -> state of canister/nodes
# lsnode -> for SVC
# lsenclosurepsu -> state of psu's
# lsenclosureslot -> state of port enclosure slots
# luninsync / lsrcrelationship -> state of syncs of your volumes (if you have more than one machine and mirroring)
# lsenclosureslotx -> state of enclosure slot port
#
# TODO: ISCSI port check (BUG in Perl module - lsportip state is not provided via IBM::SVC)
#
# example nagios command definition
#
# define command {
# command_name check_ibm_v7000_svc
# command_line $USER1$/check_ibm_v7000_svc.pl -cluster=$HOSTADDRESS$ -check=$ARG1$ -v
# }
#
# example command excluding some items
#
# define command {
# command_name check_ibm_v7000_svc_exclude
# command_line $USER1$/check_ibm_v7000_svc.pl -cluster=$HOSTADDRESS$ -check=$ARG1$ -exclude=$ARG2$ -v
# }
#
# ... and service definition
#
# define service {
# use severity2,noperf
# service_description check_lunsonline
# host_name storwize01,storwize02
# check_command check_ibm_v7000_svc!lunonline!'lun1,lun2'
# }
use strict;
use IBM::SVC1;
use Getopt::Long;
#use Data::Dumper;
my ($cluster, $ssh, $verbose, $user, $check, $help, $exclude);
$ssh="ssh";
$user="nagios";
my $state="";
my $message="";
my $params={};
my $svc_command="";
my $item="";
my $item_name="";
my $item_state="";
my $msg="";
my @exclude="";
my $x=0;
my %excl=();
GetOptions (
"cluster=s" => \$cluster,
"verbose|v" => \$verbose,
"ssh=s" => \$ssh,
"check=s" => \$check,
"help|h" => \$help,
"exclude=s" => \$exclude,
"user=s" => \$user
);
if ( !$cluster || !$check || $help) {
print_usage();
exit(0);
}
# Nagios exit states
our %states = (
OK => 0,
WARNING => 1,
CRITICAL => 2,
UNKNOWN => 3
);
# Nagios state names
our %state_names = (
0 => 'OK',
1 => 'WARNING',
2 => 'CRITICAL',
3 => 'UNKNOWN'
);
# get excluded items
if ($exclude ne "") {
if ($check eq "lunonline" || $check eq "luninsync" || $check eq "lshost") {
@exclude = split(",",$exclude);
foreach (@exclude) {
$x++;
$excl{$_} = $x;
}
} else {
print "excluding is only available for lunonline, luninsync and lshost check!\n";
exit(0);
}
}
# return states SVC/V7000 (see on console by typing svc_command -h
# generic_states for lunonline, lsenclosure,lsmdiskgrp,lsmdisk
# lsenclosurecanister, lsenclosurepsu, lsenclosureslot checks
my %generic_states = (
'online' => 'OK',
'offline' => 'CRITICAL',
'degraded' => 'CRITICAL', );
my %luninsync_states = (
'consistent_synchronized' => 'OK',
'consistent_copying' => 'OK',
'inconsistent_stopped' => 'CRITICAL',
'consistent_stopped' => 'WARNING',
'idling' => 'WARNING',
'consistent_disconnected' => 'CRITICAL',
'inconsistent_copying' => 'CRITICAL',
'idling_disconnected' => 'CRITICAL',
'inconsistent_disconnected' => 'CRITICAL', );
my %lsdrive_states = (
'online' => 'OK',
'offline' => 'CRITICAL',
'degraded' => 'CRITICAL', );
my %lsmdisk_states = (
'offline' => 'CRITICAL',
'degraded' => 'CRITICAL',
'syncing' => 'WARNING',
'initting' => 'OK',
'online' => 'OK', );
my %lsiogrp_states = (
'2' => 'OK',
'1' => 'CRITICAL',);
my %lsenclosureslot_states = (
'online' => 'OK',
'excluded_by_drive' => 'WARNING',
'excluded_by_enclosure' => 'WARNING',
'excluded_by_system' => 'WARNING',);
my %lshost_states = (
'online' => 'OK',
'offline' => 'CRITICAL',
'degraded' => 'WARNING', );
# do not edit anything below this
sub print_usage {
print <<EOU;
Usage: $0 -cluster=[IP or hostname] -check=[check] -v
-cluster Hostname or IP address of V7000 SVC (*required*)
-check Checktype (*required*)
Possible values are:
* lunonline|luninsync
* lsmdisk|lsdrive|lsenclosure|lsenclosurebattery
* lsenclosurecanister|lsenclosurepsu|lsenclosureslotx
( lsenclosureslotx = lsenclosureslot1 ... lsenclosureslotx )
-user Username which is configured on your v7000 (default nagios)
-ssh ssh method - the ssh command to use. Possible values are:
* "ssh" (default)
* "plink" (PUTTY)
-exclude comma separated list of excluded vdisknames (lunonline check)
or consistency group names (luninsync check)
-h -help Print this help
-v -verbose verbose output (OK items are listed)
EOU
exit (0);
}
# Set parameters for svc connection
$params->{'cluster_name'} = $cluster if $cluster;
$params->{'user'} = $user if $user;
$params->{'ssh_method'} = $ssh if $ssh;
#$params->{'keyfile'} = $keyfile if $keyfile;
# Create the connection with the parameters set above.
my $svc = IBM::SVC->new($params);
if ($check eq "lunonline") {
$svc_command = "lsvdisk";
$item = "vdisks (luns)";
$item_name = "name";
$item_state = "status";
&itemcheck(\%generic_states,\%excl)
} elsif ($check eq "luninsync") {
# * consistent_synchronized
# * consistent_copying
# * inconsistent_stopped
# * consistent_stopped
# * idling
# * consistent_disconnected'
# * inconsistent_copying
# * idling_disconnected
# * inconsistent_disconnected
$svc_command = "lsrcrelationship";
$item = "consistency groups";
$item_name = "consistency_group_name";
$item_state = "state";
&itemcheck(\%luninsync_states,\%excl)
} elsif ($check eq "lsmdisk") {
# * offline - the array is offline on all nodes
# * degraded - the array has deconfigured or offline members; the array is not fully redundant
# * syncing - array members are all online, the array is syncing parity or mirrors to achieve redundancy
# * initting - array members are all online, the array is initializing; the array is fully redundant
# * online - array members are all online, and the array is fully redundant
$svc_command = "lsmdisk";
$item = "mdisk";
$item_name = "name";
$item_state = "status";
&itemcheck(\%lsmdisk_states)
} elsif ($check eq "lsiogrp") {
# * 2 - the array is operating normally
# * 1 - the array has deconfigured or offline nodes; the array is not fully redundant
$svc_command = "lsiogrp";
$item = "iogrp";
$item_name = "name";
$item_state = "node_count";
&itemcheck(\%lsiogrp_states)
} elsif ($check eq "lshost") {
# * 2 - the array is operating normally
# * 1 - the array has deconfigured or offline nodes; the array is not fully redundant
$svc_command = "lshost";
$item = "host";
$item_name = "name";
$item_state = "status";
&itemcheck(\%lshost_states,\%excl)
} elsif ($check eq "lsmdiskgrp") {
# * offline - the array is offline on all nodes
# * degraded - the array has deconfigured or offline members; the array is not fully redundant
# * syncing - array members are all online, the array is syncing parity or mirrors to achieve redundancy
# * initting - array members are all online, the array is initializing; the array is fully redundant
# * online - array members are all online, and the array is fully redundant
$svc_command = "lsmdiskgrp";
$item = "mdiskgrp";
$item_name = "name";
$item_state = "status";
&itemcheck(\%generic_states)
} elsif ($check eq "lsdrive") {
# * online: blank
# * degraded: populated if associated with an error
# * offline: must be populated
$svc_command = "lsdrive";
$item = "mdisk member";
$item_name = "id";
#$item_state = "error_sequence_number";
$item_state = "status";
&itemcheck(\%lsdrive_states)
} elsif ($check eq "lsenclosure") {
# Indicates if an enclosure is visible to the SAS network:
$svc_command = "lsenclosure";
$item = "enclosure(s)";
$item_name = "id";
$item_state = "status";
&itemcheck(\%generic_states)
} elsif ($check eq "lsenclosurebattery") {
# The status of the battery:
$svc_command = "lsenclosurebattery";
$item = "enclosurebatteries";
$item_name = "battery_id";
$item_state = "status";
&itemcheck(\%generic_states)
} elsif ($check eq "lsnodebattery") {
# The status of the battery:
$svc_command = "lsnodebattery";
$item = "enclosurebatteries";
$item_name = "battery_id";
$item_state = "status";
&itemcheck(\%generic_states)
} elsif ($check eq "lsenclosurecanister") {
# The status of the canister:
$svc_command = "lsenclosurecanister";
$item = "enclosurecanister(s)";
$item_name = "node_name";
$item_state = "status";
&itemcheck(\%generic_states)
} elsif ($check eq "lsnode") {
# The status of the canister:
$svc_command = "lsnode";
$item = "enclosurecanister(s)";
$item_name = "name";
$item_state = "status";
&itemcheck(\%generic_states)
} elsif ($check eq "lsenclosurepsu") {
# The status of the psu(s)
$svc_command = "lsenclosurepsu";
$item = "enclosurepsu(s)";
$item_name = "PSU_id";
$item_state = "status";
&itemcheck(\%generic_states)
} elsif ($check =~ m/^lsenclosureslot(\d+)$/ ) {
# The status of enclosure slot port x. If the port is bypassed for multiple reasons, only one is shown.
# In order of priority, they are:
# * online: enclosure slot port x is online
# * excluded_by_drive: the drive excluded the port
# * excluded_by_enclosure: the enclosure excluded the port
# * excluded_by_system: the clustered system (system) has excluded the port
$svc_command = "lsenclosureslot";
$item = "enclosureslots port" . $1;
$item_name = "slot_id";
$item_state = "port_" . $1 . "_status";
&itemcheck(\%lsenclosureslot_states)
} else {
$state = 'UNKNOWN';
$message = "the check you provided does not exist";
}
# main check subroutine
sub itemcheck {
# get hash reference(s) from subroutine call
my $v7000_states=shift;
my $excluded=shift;
my @critical_items = "";
my @warn_items = "";
my @ok_items = "";
my @all_items = "";
my $criticalcount = 0;
my $warncount = 0;
my $okcount = 0;
my ($items_desc, $item_desc, $final_item_state);
# query storage cluster
my ($rc,$all_items_ref) = $svc->svcinfo($svc_command,{});
if ($rc == 0) {
@all_items = @$all_items_ref;
if (scalar(@all_items) == 0) {
$state = 'WARNING';
$message = "Could not find any entry for $item";
} else {
foreach my $items_params (@all_items) {
$item_desc = "$items_params->{$item_name}";
chomp($item_desc);
#print Dumper($items_params);
# ommit excluded and blank items
next if $excluded->{$item_desc} || $item_desc =~ m/^s*$/g;
$final_item_state = "$items_params->{$item_state}";
if ($v7000_states->{$final_item_state} eq 'OK' ) {
$okcount++;
push (@ok_items, $item_desc);
} elsif ($v7000_states->{$final_item_state} eq 'WARNING' ) {
$warncount++;
$msg = "$item_desc ($final_item_state) ";
push (@warn_items, $msg);
} elsif ($v7000_states->{$final_item_state} eq 'CRITICAL' ) {
$criticalcount++;
$msg = "$item_desc ($final_item_state) ";
push (@critical_items, $msg);
}
}
}
} else {
print "Cannot connect to cluster $cluster\n";
exit $states{'CRITICAL'};
}
if ( $warncount == 0 && $criticalcount == 0 && $okcount > 0 ) {
$state = 'OK';
if ($verbose) {
$message = "$state: all $item $final_item_state [" . join(" ",@ok_items) . " ]";
} else {
$message = "$state: all $item $final_item_state";
}
} elsif ( $warncount > 0 && $criticalcount == 0 ) {
$state = 'WARNING';
$message = "$state:" . join(" ",@warn_items) ;
} elsif ( ( $warncount > 0 && $criticalcount > 0 ) || ( $warncount == 0 && $criticalcount > 0 ) ) {
$state = 'CRITICAL';
$message = "$state:" . join(" ",@critical_items) . " " . join(" ",@warn_items) ;
} else {
$state = 'UNKNOWN';
$message = "$state: Could not find status information or items" ;
}
}
print $message."\n";
exit $states{$state};
# backup_config_storage_ibm.sh
#!/bin/bash
#set -x
#
#@(#) Script to backup svc
#
# version: 1.1 03-2022 eif
#
dir=`dirname $0`
. $dir/.env
# Variables
destination="mail@test.lu"
user_repoting="report"
SSH="ssh -o ConnectTimeout=30 ${user_repoting}@"
logfile=$logpath/list_storage_users.txt
list=$binpath/storage_list.txt
savedir=/workdir/svc/backup
svc_user="report"
DATE=$(date +%Y%m%d)
DAY=$(date +%d) # backup rotation 30 days
# create backup folder
if [[ ! -d $savedir ]]
then
mkdir -p $savedir
fi
#----------------------
backup_svc ()
{
date '+%Y-%m-%d %H:%M:%S'
for storage in $(cat ${list})
do
connection="${svc_user}@${storage}"
savedir1=$savedir/$storage
if [[ ! -d $savedir1 ]]
then
mkdir -p $savedir1
fi
rm -f $savedir1/svc.config.backup* >/dev/null 2>&1
echo "### Start Backup of storage:$(date '+%Y-%m-%d %H:%M:%S') $storage"
ssh $connection svcconfig backup
RC=$?
if [[ $RC != 0 ]]
then
echo "# Backup failure on storage: ${storage}"
mailx -s "Backup failure on storage: ${storage}" $destination
exit 1
else
echo "# Backup Success on storage: ${storage}"
fi
echo "### Copy Backup of storage: $storage"
scp $connection:/dumps/svc.config.backup* $savedir1
RC=$?
if [[ $RC != 0 ]]
then
echo "#******* Copy Backup failure (scp) for storage: ${storage}"
mailx -s "Copy Backup failure (scp) for storage: ${storage}" $destination
exit 1
else
echo "# Copy Backup Success (scp) for storage: ${storage}"
fi
# compress backup.... + delete old files (30 j)
cd $savedir1
rm -f svc_conf_backup_$DAY.tar* >/dev/null 2>&1
tar -cvf svc_conf_backup_$DAY.tar svc.config.backup*
gzip svc_conf_backup_$DAY.tar
echo
done
date '+%Y-%m-%d %H:%M:%S'
}
#########################
# Main
#########################
main ()
{
backup_svc
}
main > $logname.$(date '+%u') 2>&1
Disable LUN in thinprovision warning threshold
# chvdisk_warning.sh
#!/bin/bash
#
#@(#) Scripts to remove warning threshold on volumes
#
# version: 1.0 10-2023 Manu
dir=`dirname $0`
. $dir/.env
DATE=$(date +%Y%m%d)
YEAR=$(date +%Y)
MONTH=$(date +%m)
user_repoting="report"
SSH="ssh -o ConnectTimeout=30 ${user_repoting}@"
list=$binpath/storage_list.txt
storage=v7000-01
#----------------------
get_disklist ()
{
date
${SSH}${storage} "lsvdisk -delim : -nohdr" > $logpath/lsvdisk.${storage}.txt
for vol in $(cat $logpath/lsvdisk.${storage}.txt | cut -d':' -f2 | sort -u)
do
echo "$vol: "$(${SSH}${storage} "lsvdisk $vol" | egrep 'warning|copy_id')
done > $logpath/lsvdisk.${storage}.warning
}
#----------------------
generate_svccmd ()
{
for line in $(cat $logpath/lsvdisk.${storage}.warning | sed 's/\ copy_id/,copy_id/g' | sed 's/\ /;/g')
do
vdisk=$(echo $line | cut -d':' -f1)
line1=$(echo $line | cut -d':' -f2- | sed 's/,//' | cut -d',' -f1)
copy1=$(echo $line1 | sed 's/copy_id;//' | cut -d';' -f1)
warning1=$(echo $line1 | cut -d';' -f4)
line2=$(echo $line | cut -d':' -f2- | sed 's/,//' | cut -d',' -f2- | sed 's/,//')
copy2=$(echo $line2 | sed 's/copy_id;//' | cut -d';' -f1)
warning2=$(echo $line2 | cut -d';' -f4)
if [[ "$warning1" -ne "" ]]
then
if [[ "$warning1" -ne "0" ]]
then
echo "chvdisk -copy $copy1 -warning 0 $vdisk"
fi
fi
if [[ "$warning2" -ne "" ]]
then
if [[ "$warning2" -ne "0" ]]
then
echo "chvdisk -copy $copy2 -warning 0 $vdisk"
fi
fi
done
}
#########################
main ()
{
date
echo "*** Get SVC disk list"
get_disklist
echo "*** Generate SVC cmd"
generate_svccmd > $logpath/chvdisk.cmd
}
main > $logname 2>&1
cat $logpath/chvdisk.cmd
Daily copy auditlog into a file and consolidate per year
# check_audit_log.sh
#!/bin/bash
#
#@(#) Scripts to collect auditlogs from SVC / V7K users, once per day
# archived by month / year and visible on wiki
#
# version: 1.0 01-2020 Manu
dir=`dirname $0`
. $dir/.env
DATE=$(date +%Y%m%d)
YEAR=$(date +%Y)
MONTH=$(date +%m)
workdir=/workdir/svc/backup/auditlog
savedir=$workdir/$YEAR
index=$workdir/auditlog.txt
user_repoting="report"
SSH="ssh -o ConnectTimeout=30 ${user_repoting}@"
list=$binpath/storage_list.txt
# create backup folder
if [[ ! -d $savedir ]]
then
mkdir -p $savedir
fi
#----------------------
get_auditlog ()
{
date
for storage in $(cat ${list})
do
echo "Audit log for $storage"
if [ -f $savedir/${storage}_${MONTH}.out ]
then
cp $savedir/${storage}_${MONTH}.out $savedir/${storage}_${MONTH}.log
fi
${SSH}${storage} "catauditlog -delim ';'" >> $savedir/${storage}_${MONTH}.log
cat $savedir/${storage}_${MONTH}.log | sort -u > $savedir/${storage}_${MONTH}.out
done
}
#----------------------
generate_log_file ()
{
# find all files .out create up to 300 days
# Formating page for Dokuwiki
echo "====== Last User Audit log for storage ======" > ${index}
echo "" >> ${index}
for storage in $(cat ${list})
do
Ustorage=$(echo ${storage} | tr 'a-z' 'A-Z')
output=$workdir/auditlog_${storage}.txt
cat /dev/null > $output
for file1 in $(find $workdir -name "${storage}_*.out" -mtime -300)
do
cat $file1 | grep -v "/dumps/svc.config" | sort -u >> $output
done
cat $output | sort -u > $output.2
tac $output.2 > $output.1
echo "====== Last Changes on ${Ustorage} ======" > $output
date "+%d-%m-%Y %H:%M" >> $output
echo '#code#' >> $output
cat $output.1 >> $output
echo '#/code#' >> $output
echo "[[storage:svc:auditlog_${storage}|${Ustorage}]]" >> ${index}
echo "" >> ${index}
rm -f $output.1 $output.2
done
}
#------------------------------------------------
send_wiki()
{
dokupath=/var/www/html/dokuwiki/data/pages/storage/svc
dokuuser=apache
dokugrp=apache
for file1 in auditlog_*.txt
do
sudo su - root -c "/bin/cp $workdir/$file1 $dokupath"
done
sudo su - root -c "/bin/cp ${index} $dokupath"
sudo su - root -c "/bin/chown -R $dokuuser.$dokugrp $dokupath"
}
#########################
# Main
#########################
main ()
{
date
echo "*** Get audit log on storage"
get_auditlog
echo "*** Generate main audit log file"
generate_log_file
echo "*** Copy to Wiki"
send_wiki
}
main > $logname 2>&1
check_ibm_v7000.sh <code> #!/bin/bash # Author: Lazzarin Alberto # Date: 10-04-2013 # Version 1.4 # # This plugin check the HW status of IBM Storwize v7000. # To use this script you need to create a linux user with his ssh certificate. # Create the same user on the v7000, member of monitor group and upload the public ssh key. # Try to log from linux machine to the v7000 without password, if it function you can use the plugin. # I use the 'nagios' user to check the remote system. # The help is included into the script. # # # # CHANGELOG # # 1.4 Made by Andrea Tedesco [andrea85 . tedesco @ gmail . com] # Add check of v7000 Unified # 1.3 Made by Ivan Bergantin [ivan . bergantin @ gmail . com] suggested by Leandro Freitas [leandro @ nodeps . com . br] # Add short output in “Service Status Details For Host” view, and detailed output in “Service Information”view # # 1.2 Made by Feilong # Add check of mirror status between two volumes on two IBM V7000. # It check the number of mirrors, the numbers of consitent and synchronized mirrors. If they are differents, the status returned is critical. # # 1.1 # Change login method from from 'plink' to ssh. # Add “OK” and “ATTENTION” in the output. # # 1.0 # First release. #
ssh=/usr/bin/ssh exitCode=0
while getopts 'M:U:Q:d:h' OPT; do
case $OPT in M) storage=$OPTARG;; U) user=$OPTARG;; Q) query=$OPTARG;; h) hlp="yes";; *) unknown="yes";; esac
done
# usage HELP=“
Check IBM Storwize v7000 throght ssh (GPL licence)
usage: $0 [ -M value -U value -Q command -h ]
syntax:
lsarray
lsdrive
lsvdisk
lsenclosure
lsenclosurebattery
lsenclosurecanister
lsenclosurepsu
lsenclosureslot
lsrcrelationship
unified
-h --> Print This Help Screen
Note : This check use ssh protocoll.
”
if [ “$hlp” = “yes” -o $# -lt 1 ]; then
echo "$HELP"
exit 0
fi
tmp_file=/tmp/v7000_$storage_$query.tmp tmp_file_OK=/tmp/v7000_OK.tmp outputMess=“”
#echo -ne “IBM Storwize v7000 Health Check\n”
case $query in
lsarray) $ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $3}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: MDisk OFFLINE \n"
else
outputMess="$outputMess OK: MDisks \n"
fi
while read line
do
mdisk_name=$(echo "${line}" | awk '{printf $2}')
mdisk_status=$(echo "${line}" | awk '{printf $3}')
if [ $mdisk_status = "online" ]; then
outputMess="$outputMess OK: MDisks $mdisk_name status: $mdisk_status \n"
else
outputMess="$outputMess ATTENTION: MDisks $mdisk_name status: $mdisk_status \n"
exitCode=2
fi
done < $tmp_file ;;
lsdrive)
$ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $2}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: Disk OFFLINE \n"
else
outputMess="$outputMess OK: Drive \n"
fi
drive_total=$(/bin/cat $tmp_file |/usr/bin/wc -l)
while read line
do
drive_n=$(echo "${line}" | awk '{printf $1}')
drive_status=$(echo "${line}" | awk '{printf $2}')
drive_role=$(echo "${line}" | awk '{printf $4}')
drive_type=$(echo "${line}" | awk '{printf $5}')
drive_capacity=$(echo "${line}" | awk '{printf $6}')
drive_enclosure=$(echo "${line}" | awk '{printf $10}')
drive_slot=$(echo "${line}" | awk '{printf $11}')
if [ $drive_status = "online" ]; then
outputMess="$outputMess OK: Drive $drive_n is online \n"
else
outputMess="$outputMess ATTENTION: Disk $drive_n \nstatus: $disk_status \nrole: $drive_role \ntype: $drive_type \ncapacity: $drive_capacity \nenclosure: $drive_enclosure \nslot: $drive_slot "
exitCode=2
fi
done < $tmp_file
;;
lsvdisk)
$ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $5}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: VDisk OFFLINE \n"
else
outputMess="$outputMess OK: VDisk \n"
fi
while read line
do
vdisk_name=$(echo "${line}" | awk '{printf $2}')
vdisk_status=$(echo "${line}" | awk '{printf $5}')
if [ $vdisk_status = "online" ]; then
outputMess="$outputMess OK: VDisks $vdisk_name status: $vdisk_status \n"
else
outputMess="$outputMess ATTENTION: VDisks $mdisk_name status: $vdisk_status \n"
exitCode=2
fi
done < $tmp_file ;;
lsenclosure)
$ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $2}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: Enclosure OFFLINE \n"
else
outputMess="$outputMess OK: Enclosure \n"
fi
while read line
do
enc_n=$(echo "${line}" | awk '{printf $1}')
enc_status=$(echo "${line}" | awk '{printf $2}')
enc_pn=$(echo "${line}" | awk '{printf $7}')
enc_sn=$(echo "${line}" | awk '{printf $8}')
if [ $enc_status = "online" ]; then
outputMess="$outputMess OK: Enclosure $enc_n status: $enc_status \n"
else
outputMess="$outputMess ATTENTION: Enclosure $enc_n status: $enc_status sn: $enc_sn pn: $enc_pn \n"
exitCode=2
fi
done < $tmp_file ;;
lsenclosurebattery)
$ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $3}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: Battery OFFLINE \n"
else
outputMess="$outputMess OK: Battery \n"
fi
while read line
do
batt_n=$(echo "${line}" | awk '{printf $2}')
batt_status=$(echo "${line}" | awk '{printf $3}')
batt_charge=$(echo "${line}" | awk '{printf $4}')
batt_rec=$(echo "${line}" | awk '{printf $5}')
batt_charge=$(echo "${line}" | awk '{printf $6}')
batt_eol=$(echo "${line}" | awk '{printf $7}')
if [ $batt_status = "online" -a $batt_rec = "no" -a $batt_charge = "100" -a $batt_eol = "no" ]; then
outputMess="$outputMess OK: Battery $batt_n status: $batt_status \n"
else
outputMess="$outputMess ATTENTION: Battery $batt_n status: $batt_statusn recharge: $batt_rec charged: $batt_charge eol: $batt_eol \n"
exitCode=2
fi
done < $tmp_file ;;
lsenclosurecanister)
$ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $3}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: Canister OFFLINE \n"
else
outputMess="$outputMess OK: Canister \n"
fi
while read line
do
can_id=$(echo "${line}" | awk '{printf $2}')
can_enc_id=$(echo "${line}" | awk '{printf $1}')
can_stat=$(echo "${line}" | awk '{printf $3}')
can_type=$(echo "${line}" | awk '{printf $4}')
if [ $can_stat = "online" ]; then
outputMess="$outputMess OK: Canister $can_id enclosure: $can_enc_id status: $can_stat \n"
else
outputMess="$outputMess ATTENTION: Canister $can_id enclosure: $can_enc_id status: $can_stat type: $can_type \n"
exitCode=2
fi
done < $tmp_file ;;
lsenclosurepsu)
$ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $3}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: PSU OFFLINE \n"
else
outputMess="$outputMess OK: PSU \n"
fi
while read line
do
psu_id=$(echo "${line}" | awk '{printf $2}')
psu_enc_id=$(echo "${line}" | awk '{printf $1}')
psu_stat=$(echo "${line}" | awk '{printf $3}')
if [ $psu_stat = "online" ]; then
outputMess="$outputMess OK: PSU $psu_id enclosure: $psu_enc_id status: $psu_stat \n"
else
outputMess="$outputMess ATTENTION: PSU $psu_id enclosure: $psu_enc_id status: $psu_stat \n"
exitCode=2
fi
done < $tmp_file ;;
lsenclosureslot)
$ssh $user@$storage $query |sed '1d' > $tmp_file
cat $tmp_file |awk '{printf $3, $4}' |grep -i offline
if [ "$?" -eq "0" ]; then
outputMess="$outputMess CRITICAL: EnclosureSlot OFFLINE \n"
else
outputMess="$outputMess OK: EnclosureSlot \n"
fi
while read line
do
slt_enc_id=$(echo "${line}" | awk '{printf $1}')
slt_id=$(echo "${line}" | awk '{printf $2}')
slt_prt1_stat=$(echo "${line}" | awk '{printf $3}')
slt_prt2_stat=$(echo "${line}" | awk '{printf $4}')
slt_drv=$(echo "${line}" | awk '{printf $5}')
drv_id=$(echo "${line}" | awk '{printf $6}')
if [ $slt_prt1_stat = "online" -a $slt_prt2_stat = "online" -a $slt_drv = "yes" ]; then
outputMess="$outputMess OK: Drive-$drv_id enclosure-$slt_enc_id slot-$slt_id port1-$slt_prt1_stat port2-$slt_prt2_stat\n"
else
outputMess="$outputMess ATTENTION: Drive-$drv_id enclosure-$slt_enc_id slot-$slt_id port1-$slt_prt1_stat port2-$slt_prt2_stat \n"
exitCode=2
fi
done < $tmp_file ;;
lsrcrelationship)
volume_mirror_prod=$($ssh $user@$storage $query | grep -c "rcrel*")
volume_mirror_sync=$($ssh $user@$storage $query | grep -c "consistent_synchronized")
if [ $volume_mirror_prod = $volume_mirror_sync ]; then
outputMess="$outputMess OK: $volume_mirror_prod mirors are consistent and synchronized \n"
else
outputMess="$outputMess CRITICAL: sur les $volume_mirror_prod volumes, only $volume_mirror_sync are consistent and synchronized \n"
exitCode=2
fi
;;
unified)
# Execute remote command
$ssh $user@$storage lshealth -Y > $tmp_file
# Parse remote command output
while read line
do
case $(echo "$line" | cut -d : -f 9) in
OK) # Sensor OK state -> do nothing
outputMess="${outputMess}`echo $line | cut -d : -f 7,9 >> $tmp_file_OK`"
;;
WARNING) # Sensor WARNING state
if [ "$exitCode" -lt 1 ]; then
exitCode=1;
fi
# Append sensor message to output
if [ -n "$outputMess" ]; then
outputMess="$outputMess +++ ";
fi
outputMess="${outputMess}STATE WARNING - [`echo $line | cut -d : -f 7`:`echo $line | cut -d : -f 8`] `echo $line | cut -d : -f 10`"
;;
ERROR) # Sensor ERROR state
if [ "$exitCode" -lt 2 ]; then
exitCode=2;
fi
# Append sensor message to output
if [ -n "$outputMess" ]; then
outputMess="$outputMess +++ ";
fi
outputMess="${outputMess}STATE CRITICAL - [`echo $line | cut -d : -f 7`:`echo $line | cut -d : -f 8`] `echo $line | cut -d : -f 10`"
;;
esac
done < $tmp_file
# No warnings/errors detected if [ "$exitCode" -eq 0 ]; then outputMess=`uniq "$tmp_file_OK"`; fi ;; *) echo -ne "Command not found. \n" exit 3 ;;
esac
rm $tmp_file rm $tmp_file_OK echo -ne “$outputMess\n” exit $exitCode <code>