MEMORY CHECK PLUGIN FOR NAGIOS

用于检测系统内存使用情况的nagios plugin脚本

#!/bin/sh

#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; version 2 of the License only.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

ST_OK=0
ST_WR=1
ST_CR=2
ST_UK=3
wcdiff=0
wclvls=0

PROGNAME=`basename $0`
VERSION="Version 1.0,"
AUTHOR="2009, Mike Adolphs (http://www.matejunkie.com/)"

print_version() {
echo "$VERSION $AUTHOR"
}

print_help() {
print_version $PROGNAME $VERSION
echo ""
echo "$PROGNAME is a Nagios plugin to check the current memory usage via"
echo "/proc/meminfo. It should work on almost every system where you're"
echo "able to cat /proc/meminfo. The script itself is written sh-compliant"
echo "and free software under the terms of the GPLv2."
echo ""
echo "$PROGNAME -s/--style kb/perc [-w/--warning] [-c/--critical]"
echo "Options:"
echo "  -s/--style)"
echo "     You're able to set the output to kilobyte (kb) or percentage"
echo "     (perc) values. Please note that other values will result in"
echo "     termination. Default is: kb"
echo "  -w/--warning)"
echo "     Warning level in kb or percent for the amount of used memory."
echo "     Note that when the style is set to percentage, only values"
echo "     betweeen 0 and 100 are allowed. Default is: Off."
echo "  -c/--critical)"
echo "     Critical level in kb or percent for the amount of used memory."
echo "     Note that when the style is set to percentage, only values"
echo "     between 0 and 100 are allowed. Default is: Off."
exit $ST_UK
}

val_wcdiff() {
if [ ${lv_wr} -gt ${lv_cr} ]
then
wcdiff=1
fi
}

val_wclvls() {
if [ $style = "perc" ]
then
if [ "$lv_wr" -lt 0 -o "$lv_wr" -gt 100 -o "$lv_cr" -lt 0 -o "$lv_cr" -gt 100 ]
then
wclvls=1
val_wcdiff
fi
fi
}

style="kb"

while test -n "$1"; do
case "$1" in
--help|-h)
print_help
exit $ST_UK
;;
--version|-v)
print_version $PROGNAME $VERSION
exit $ST_UK
;;
--style|-s)
style=$2
shift
;;
--warning|-w)
lv_wr=$2
shift
;;
--critical|-c)
lv_cr=$2
shift
;;
*)
echo "Unknown argument: $1"
print_help
exit $ST_UK
;;
esac
shift
done

if [ ! -z "$lv_wr" -a ! -z "$lv_cr" ]
then
val_wcdiff
val_wclvls
fi

if [ $wcdiff = 1 ]
then
echo "Please adjust levels. The critical level must be higher than the warning level!"
if [ $wclvls = 1 ]
then
echo "Warning and critical level values must be between 0 and 100."
fi
exit $ST_UK
fi

if [ $wclvls = 1 ]
then
echo "Warning and critical level values must be between 0 and 100."
exit $ST_UK
fi

MEM_TOTAL=`grep "^MemTotal" /proc/meminfo|awk '{print $2}'`
TMP_MEM_FREE=`grep "^MemFree" /proc/meminfo|awk '{print $2}'`
TMP_MEM_USED=`expr $MEM_TOTAL - $TMP_MEM_FREE`
BUFFERS=`grep "^Buffers" /proc/meminfo|awk '{print $2}'`
CACHED=`grep "^Cached" /proc/meminfo|awk '{print $2}'`

P_MEM_FREE=`echo "scale=2; $TMP_MEM_FREE / $MEM_TOTAL * 100" | bc -l | sed 's/.[0-9][0-9]//'`
P_MEM_USED=`echo "scale=0; 100 - $P_MEM_FREE" | bc -l`

if [ $style = "kb" ]
then
if [ ! -z "$lv_wr" -a ! -z "$lv_cr" ]
then
if [ ${TMP_MEM_USED} -ge ${lv_wr} -a ${TMP_MEM_USED} -lt ${lv_cr} ]
then
echo "WARNING - Total: $MEM_TOTAL kb, Used: $TMP_MEM_USED kb, Free: $TMP_MEM_FREE kb | 'mem_total'=$MEM_TOTAL 'mem_used'=$TMP_MEM_USED;$lv_wr;$lv_cr 'mem_free'=$TMP_MEM_FREE"
exit $ST_WR
elif [ ${TMP_MEM_USED} -ge ${lv_cr} ]
then
echo "CRITICAL - Total: $MEM_TOTAL kb, Used: $TMP_MEM_USED kb, Free: $TMP_MEM_FREE kb | 'mem_total'=$MEM_TOTAL 'mem_used'=$TMP_MEM_USED;$lv_wr;$lv_cr 'mem_free'=$TMP_MEM_FREE"
exit $ST_CR
else
echo "OK - Total: $MEM_TOTAL kb, Used: $TMP_MEM_USED kb, Free: $TMP_MEM_FREE kb | 'mem_total'=$MEM_TOTAL 'mem_used'=$TMP_MEM_USED;$lv_wr;$lv_cr 'mem_free'=$TMP_MEM_FREE"
exit $ST_OK
fi
else
echo "OK - Total: $MEM_TOTAL kb, Used: $TMP_MEM_USED kb, Free: $TMP_MEM_FREE kb | 'mem_total'=$MEM_TOTAL 'mem_used'=$TMP_MEM_USED 'mem_free'=$TMP_MEM_FREE"
exit $ST_OK
fi
elif [ $style = "perc" ]
then
if [ ! -z "$lv_wr" -a ! -z "$lv_cr" ]
then
if [ ${P_MEM_USED} -ge ${lv_wr} -a ${P_MEM_USED} -lt ${lv_cr} ]
then
echo "WARNING - Used: $P_MEM_USED%, Free: $P_MEM_FREE% | 'mem_used'=$P_MEM_USED;$lv_wr;$lv_cr 'mem_free'=$P_MEM_FREE"
exit $ST_WR
elif [ ${P_MEM_USED} -ge ${lv_cr} ]
then
echo "CRITICAL - Used: $P_MEM_USED%, Free: $P_MEM_FREE% | 'mem_used'=$P_MEM_USED;$lv_wr;$lv_cr 'mem_free'=$P_MEM_FREE"
exit $ST_CR
else
echo "OK - Used: $P_MEM_USED%, Free: $P_MEM_FREE% | 'mem_used'=$P_MEM_USED;$lv_wr;$lv_cr 'mem_free'=$P_MEM_FREE"
exit $ST_OK
fi
else
echo "OK - Used: $P_MEM_USED%, Free: $P_MEM_FREE% | 'mem_used'=$P_MEM_USED 'mem_free'=$P_MEM_FREE"
exit $ST_OK
fi
else
echo "Style (-s) must be either kb (kilobyte) or perc (for percent). kb is"
echo "being used as the default behavior when you don't provide the -s option."
echo ""
echo "For more information try -h or --help!"
exit $ST_UK
fi

PERL版本:
#!/usr/bin/perl -w
# $Id: check_mem.pl 2 2002-02-28 06:42:51Z egalstad $


# Original script stolen from:
# check_mem.pl Copyright (C) 2000 Dan Larsson <dl@tyfon.net>
# hacked by
# Justin Ellison <justin@techadvise.com>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# you should have received a copy of the GNU General Public License
# along with this program (or with Nagios);  if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA


# Tell Perl what we need to use
use strict;
use Getopt::Std;


#TODO - Convert to Nagios::Plugin
#TODO - Use an alarm


# Predefined exit codes for Nagios
use vars qw($opt_c $opt_f $opt_u $opt_w $opt_C $opt_v %exit_codes);
%exit_codes   = ('UNKNOWN' ,-1,
         'OK'      , 0,
                 'WARNING' , 1,
                 'CRITICAL', 2,
                 );


# Get our variables, do our checking:
init();


# Get the numbers:
my ($free_memory_kb,$used_memory_kb,$caches_kb) = get_memory_info();
print "$free_memory_kb Free\n$used_memory_kb Used\n$caches_kb Cache\n" if ($opt_v);


if ($opt_C) { #Do we count caches as free?
    $used_memory_kb -= $caches_kb;
    $free_memory_kb += $caches_kb;
}


# Round to the nearest KB
$free_memory_kb = sprintf('%d',$free_memory_kb);
$used_memory_kb = sprintf('%d',$used_memory_kb);
$caches_kb = sprintf('%d',$caches_kb);


# Tell Nagios what we came up with
tell_nagios($used_memory_kb,$free_memory_kb,$caches_kb);




sub tell_nagios {
    my ($used,$free,$caches) = @_;
    
    # Calculate Total Memory
    my $total = $free + $used;
    print "$total Total\n" if ($opt_v);


    my $perfdata = "|TOTAL=${total}KB;;;; USED=${used}KB;;;; FREE=${free}KB;;;; CACHES=${caches}KB;;;;";
    
    if ($opt_f) {
      my $percent    = sprintf "%.1f", ($free / $total * 100);
      if ($percent <= $opt_c) {
          finish("CRITICAL - $percent% ($free kB) free!$perfdata",$exit_codes{'CRITICAL'});
      }
      elsif ($percent <= $opt_w) {
          finish("WARNING - $percent% ($free kB) free!$perfdata",$exit_codes{'WARNING'});
      }
      else {
          finish("OK - $percent% ($free kB) free.$perfdata",$exit_codes{'OK'});
      }
    }
    elsif ($opt_u) {
      my $percent    = sprintf "%.1f", ($used / $total * 100);
      if ($percent >= $opt_c) {
          finish("CRITICAL - $percent% ($used kB) used!|$perfdata",$exit_codes{'CRITICAL'});
      }
      elsif ($percent >= $opt_w) {
          finish("WARNING - $percent% ($used kB) used!|$perfdata",$exit_codes{'WARNING'});
      }
      else {
          finish("OK - $percent% ($used kB) used.|$perfdata",$exit_codes{'OK'});
      }
    }
}


# Show usage
sub usage() {
  print "\ncheck_mem.pl v1.0 - Nagios Plugin\n\n";
  print "usage:\n";
  print " check_mem.pl -<f|u> -w <warnlevel> -c <critlevel>\n\n";
  print "options:\n";
  print " -f           Check FREE memory\n";
  print " -u           Check USED memory\n";
  print " -C           Count OS caches as FREE memory\n";
  print " -w PERCENT   Percent free/used when to warn\n";
  print " -c PERCENT   Percent free/used when critical\n";
  print "\nCopyright (C) 2000 Dan Larsson <dl\@tyfon.net>\n";
  print "check_mem.pl comes with absolutely NO WARRANTY either implied or explicit\n";
  print "This program is licensed under the terms of the\n";
  print "GNU General Public License (check source code for details)\n";
  exit $exit_codes{'UNKNOWN'}; 
}


sub get_memory_info {
    my $used_memory_kb  = 0;
    my $free_memory_kb  = 0;
    my $total_memory_kb = 0;
    my $caches_kb       = 0;


    my $uname;
    if ( -e '/usr/bin/uname') {
        $uname = `/usr/bin/uname -a`;
    }
    elsif ( -e '/bin/uname') {
        $uname = `/bin/uname -a`;
    }
    else {
        die "Unable to find uname in /usr/bin or /bin!\n";
    }
    print "uname returns $uname" if ($opt_v);
    if ( $uname =~ /Linux/ ) {
        my @meminfo = `/bin/cat /proc/meminfo`;
        foreach (@meminfo) {
            chomp;
            if (/^Mem(Total|Free):\s+(\d+) kB/) {
                my $counter_name = $1;
                if ($counter_name eq 'Free') {
                    $free_memory_kb = $2;
                }
                elsif ($counter_name eq 'Total') {
                    $total_memory_kb = $2;
                }
            }
            elsif (/^(Buffers|Cached):\s+(\d+) kB/) {
                $caches_kb += $2;
            }
        }
        $used_memory_kb = $total_memory_kb - $free_memory_kb;
    }
    elsif ( $uname =~ /SunOS/ ) {
        eval "use Sun::Solaris::Kstat";
        if ($@) { #Kstat not available
            if ($opt_C) {
                print "You can't report on Solaris caches without Sun::Solaris::Kstat available!\n";
                exit $exit_codes{UNKNOWN};
            }
            my @vmstat = `/usr/bin/vmstat 1 2`;
            my $line;
            foreach (@vmstat) {
              chomp;
              $line = $_;
            }
            $free_memory_kb = (split(/ /,$line))[5] / 1024;
            my @prtconf = `/usr/sbin/prtconf`;
            foreach (@prtconf) {
                if (/^Memory size: (\d+) Megabytes/) {
                    $total_memory_kb = $1 * 1024;
                }
            }
            $used_memory_kb = $total_memory_kb - $free_memory_kb;
            
        }
        else { # We have kstat
            my $kstat = Sun::Solaris::Kstat->new();
            my $phys_pages = ${kstat}->{unix}->{0}->{system_pages}->{physmem};
            my $free_pages = ${kstat}->{unix}->{0}->{system_pages}->{freemem};
            # We probably should account for UFS caching here, but it's unclear
            # to me how to determine UFS's cache size.  There's inode_cache,
            # and maybe the physmem variable in the system_pages module??
            # In the real world, it looks to be so small as not to really matter,
            # so we don't grab it.  If someone can give me code that does this, 
            # I'd be glad to put it in.
            my $arc_size = (exists ${kstat}->{zfs} && ${kstat}->{zfs}->{0}->{arcstats}->{size}) ?
                 ${kstat}->{zfs}->{0}->{arcstats}->{size} / 1024 
                 : 0;
            $caches_kb += $arc_size;
            my $pagesize = `pagesize`;
    
            $total_memory_kb = $phys_pages * $pagesize / 1024;
            $free_memory_kb = $free_pages * $pagesize / 1024;
            $used_memory_kb = $total_memory_kb - $free_memory_kb;
        }
    }
    else {
        if ($opt_C) {
            print "You can't report on $uname caches!\n";
            exit $exit_codes{UNKNOWN};
        }
    my $command_line = `vmstat | tail -1 | awk '{print \$4,\$5}'`;
    chomp $command_line;
        my @memlist      = split(/ /, $command_line);
    
        # Define the calculating scalars
        $used_memory_kb  = $memlist[0]/1024;
        $free_memory_kb = $memlist[1]/1024;
        $total_memory_kb = $used_memory_kb + $free_memory_kb;
    }
    return ($free_memory_kb,$used_memory_kb,$caches_kb);
}


sub init {
    # Get the options
    if ($#ARGV le 0) {
      &usage;
    }
    else {
      getopts('c:fuCvw:');
    }
    
    # Shortcircuit the switches
    if (!$opt_w or $opt_w == 0 or !$opt_c or $opt_c == 0) {
      print "*** You must define WARN and CRITICAL levels!\n";
      &usage;
    }
    elsif (!$opt_f and !$opt_u) {
      print "*** You must select to monitor either USED or FREE memory!\n";
      &usage;
    }
    
    # Check if levels are sane
    if ($opt_w <= $opt_c and $opt_f) {
      print "*** WARN level must not be less than CRITICAL when checking FREE memory!\n";
      &usage;
    }
    elsif ($opt_w >= $opt_c and $opt_u) {
      print "*** WARN level must not be greater than CRITICAL when checking USED memory!\n";
      &usage;
    }
}


sub finish {
    my ($msg,$state) = @_;
    print "$msg\n";
    exit $state;
}

http://www.sysadminsjourney.com/content/2009/06/04/new-and-improved-checkmempl-nagios-plugin

Let’s show an example, run from a Solaris host with ZFS:

$ /usr/local/nagios/libexec/check_mem.pl -w 10 -c 5 -f 
WARNING - 9.9% (406520 kB) free!|TOTAL=4113824KB;;;; USED=3707304KB;;;; FREE=406520KB;;;; CACHES=816947KB;;;;

Uh oh! I have less than 10% free of the 4GB total. Wait, the ZFS ARC is using up 800MB of that! Let’s try again with the -C option:

# /usr/local/nagios/libexec/check_mem.pl -w 10 -c 5 -f -C
OK - 29.7% (1220611 kB) free.|TOTAL=4113823KB;;;; USED=2893212KB;;;; FREE=1220611KB;;;; CACHES=817075KB;;;;

That’s better!

You’ll see the same sort of thing on Linux.

 

发表回复