#!/bin/bash # # +------------------------------------------------------------------+ # | _ _ | # | ___ | |__ ___ ___ _ ____ _(_)_ _ _ __ ___ | # | / _ \| '_ \/ __|/ _ \ '__\ \ / / | | | | '_ ` _ \ | # | | (_) | |_) \__ \ __/ | \ V /| | |_| | | | | | | | # | \___/|_.__/|___/\___|_| \_/ |_|\__,_|_| |_| |_| | # | | # | Copyright Adam Armstrong 2012 adama@observium.org | # +------------------------------------------------------------------+ # # +------------------------------------------------------------------+ # | Copyright Mathias Kettner 2012 mk@mathias-kettner.de | # +------------------------------------------------------------------+ # # This file was originally part of Check_MK. # The official homepage is at http://mathias-kettner.de/check_mk. # # check_mk is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation in version 2. check_mk is distributed # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with- # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. See the GNU General Public License for more de- # ails. You should have received a copy of the GNU General Public # License along with GNU Make; see the file COPYING. If not, write # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, # Boston, MA 02110-1301 USA. # Remove locale settings to eliminate localized outputs where possible export LC_ALL=C unset LANG export MK_LIBDIR="/usr/lib/check_mk_agent" export MK_CONFDIR="/etc/check_mk" # Make sure, locally installed binaries are found PATH=$PATH:/usr/local/bin # All executables in PLUGINSDIR will simply be executed and their # ouput appended to the output of the agent. Plugins define their own # sections and must output headers with '<<<' and '>>>' PLUGINSDIR=$MK_LIBDIR/plugins # All executables in LOCALDIR will by executabled and their # output inserted into the section <<>>. Please # refer to online documentation for details about local checks. LOCALDIR=$MK_LIBDIR/local # close standard input (for security reasons) and stderr if [ "$1" = -d ] then set -xv else exec <&- 2>/dev/null fi echo '<<>>' echo Version: 1.2.0b1 echo AgentOS: linux echo PluginsDirectory: $PLUGINSDIR echo LocalDirectory: $LOCALDIR echo AgentDirectory: $MK_CONFDIR # If we are called via xinetd, try to find only_from configuration if [ -n "$REMOTE_HOST" ] then echo -n 'OnlyFrom: ' echo $(sed -n '/^service[[:space:]]*check_mk/,/}/s/^[[:space:]]*only_from[[:space:]]*=[[:space:]]*\(.*\)/\1/p' /etc/xinetd.d/* | head -n1) fi # Partitionen (-P verhindert Zeilenumbruch bei langen Mountpunkten) # Achtung: NFS-Mounts werden grundsaetzlich ausgeblendet, um # Haenger zu vermeiden. Diese sollten ohnehin besser auf dem # Server, als auf dem Client ueberwacht werden. echo '<<>>' df -PTlk -x smbfs -x tmpfs -x cifs -x iso9660 -x udf -x nfsv4 | sed 1d # VMWare shows its own filesystems with 'vdf'. Just one # problem: it outputs not 7 but only 6 columns if which vdf > /dev/null then vdf -P | grep ^/vmfs/volumes | sed 's/ / vmfs /' fi # Check NFS mounts by accessing them with stat -f (System # call statfs()). If this lasts more then 2 seconds we # consider it as hanging. We need waitmax. if type waitmax >/dev/null then STAT_VERSION=$(stat --version | head -1 | cut -d" " -f4) STAT_BROKE="5.3.0" echo '<<>>' sed -n '/ nfs /s/[^ ]* \([^ ]*\) .*/\1/p' < /proc/mounts | while read MP do if [ $STAT_VERSION != $STAT_BROKE ]; then waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" || \ echo "$MP hanging 0 0 0 0" else waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" && \ printf '\n'|| echo "$MP hanging 0 0 0 0" fi done fi # Check mount options. Filesystems may switch to 'ro' in case # of a read error. echo '<<>>' grep ^/dev < /proc/mounts # processes including username, without kernel processes echo '<<>>' ps ax -o user,vsz,rss,pcpu,command --columns 10000 | sed -e 1d -e 's/ *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) *\([^ ]*\) */(\1,\2,\3,\4) /' # Speicherauslastung echo '<<>>' egrep -v '^Swap:|^Mem:|total:' < /proc/meminfo # Load und Anzahl Prozesse echo '<<>>' echo "$(cat /proc/loadavg) $(grep -E '^CPU|^processor' < /proc/cpuinfo | wc -l)" # Uptime echo '<<>>' cat /proc/uptime # Network interfaces (Link, Autoneg, Speed) # This requires ethtool if which ethtool > /dev/null then echo '<<>>' for eth in $(cat /proc/net/dev | sed -rn -e 's/[[:space:]]*//g' -e '/ *([^:]):.*/s//\1/p' | egrep -vx '(lo|sit.*)') do echo $eth $(ethtool $eth | egrep '(Speed|Duplex|Link detected|Auto-negotiation):' | cut -d: -f2 | sed 's/ *//g') done fi # New variant: Information about speed and state in one section echo '<<>>' sed 1,2d /proc/net/dev if which ethtool > /dev/null then for eth in $(sed -e 1,2d < /proc/net/dev | cut -d':' -f1) do echo "[$eth]" ethtool $eth | egrep '(Speed|Duplex|Link detected|Auto-negotiation):' done fi # Number of TCP connections in the various states echo '<<>>' netstat -nt | awk ' /^tcp/ { c[$6]++; } END { for (x in c) { print x, c[x]; } }' # Platten- und RAID-Status von LSI-Controlleren, falls vorhanden if which cfggen > /dev/null ; then echo '<<>>' cfggen 0 DISPLAY | egrep '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' | sed -e 's/ *//g' -e 's/:/ /' fi # Multipathgeraete if which multipath >/dev/null ; then echo '<<>>' multipath -l fi # Soft-RAID echo '<<>>' cat /proc/mdstat # Performancecounter Platten echo '<<>>' date +%s egrep ' (x?[shv]d[a-z]*|cciss/c[0-9]+d[0-9]+) ' < /proc/diskstats # Performancecounter Kernel echo '<<>>' date +%s cat /proc/vmstat /proc/stat # Statistik der Netzwerkgeraete (Pakete, Kollisionen, etc) echo '<<>>' # Genauen Zeitstempel einfuegen, da Counter von Zeit abhaengen date +%s sed -e 1,2d -e 's/:/ /g' < /proc/net/dev if which ipmitool >/dev/null then echo '<<>>' IPMI_FILE=$MK_CONFDIR/ipmitool_sensors.cache if [ ! -d $MK_CONFDIR ]; then mkdir -p $MK_CONFDIR fi # Do not use cache file after 20 minutes IPMI_MAXAGE=1200 # Check if file exists and is recent enough if [ -s $IPMI_FILE ] then NOW=$(date +%s) MTIME=$(stat -c %Y $IPMI_FILE) if [ $((NOW - MTIME)) -le $IPMI_MAXAGE ] ; then USE_IPMI_FILE=1 fi fi if [ -s "$IPMI_FILE" ] then grep -v 'command failed' "$IPMI_FILE" \ | sed -e 's/ *| */|/g' -e "s/ /_/g" -e 's/_*$//' -e 's/|/ /g' \ | egrep -v '^[^ ]+ na ' \ | grep -v ' discrete ' fi if [ -z "$USE_IPMI_FILE" -a ! -e "$IPMI_FILE.new" ] then setsid bash -c "set -o noclobber ; ipmitool sensor list > $IPMI_FILE.new && mv $IPMI_FILE.new $IPMI_FILE || rm -f $IPMI_FILE*" & fi fi # IPMI data via ipmi-sensors (of freeipmi). Please make sure, that if you # have installed freeipmi that IPMI is really support by your hardware. # The agent tries to avoid hanging forever by setting a limit of 300 seconds # for the first run (where the cache is created). If ipmi-sensors runs into # that timeout, it leaves and empty cache file. We skip this check forever # if we find that empty cache file. sdrcache=/var/cache/.freeipmi/sdr-cache/sdr-cache-$(hostname).127.0.0.1 if which ipmi-sensors >/dev/null && [ ! -e "$sdrcache" -o -s "$sdrcache" ] then echo '<<>>' # No cache file existing? => Impose a high time limit. We do not suffice # in creating the cache we most probably run on a hardware where this tool # is hanging forever. We make sure that we never try again in that case! if [ ! -e "$sdrcache" ] then WAITMAX="waitmax 300" elif tail --bytes 2 < "$sdrcache" | od -t x2 | grep -q 0a0a then WAITMAX="waitmax 3" else # Cache file corrupt. Must end with two linefeeds. rm -f $sdrcache WAITMAX= fi # Newer ipmi-sensors version have new output format; Legacy format can be used if ipmi-sensors --help | grep -q legacy-output; then IPMI_FORMAT="--legacy-output" else IPMI_FORMAT="" fi # Aquire lock with flock in order to avoid multiple runs of ipmi-sensors # in case of parallel or overlapping calls of the agent. ( flock -n 200 --wait 60 # At least with ipmi-sensoirs 0.7.16 this group is Power_Unit instead of "Power Unit" for class in Temperature Power_Unit Fan do $WAITMAX ipmi-sensors $IPMI_FORMAT --sdr-cache-directory /var/cache -g "$class" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@' # In case of a timeout immediately leave loop. if [ $? = 255 ] ; then break ; fi WAITMAX="waitmax 3" done ) 200>>"$sdrcache" fi # State of LSI MegaRAID controller via MegaCli. You can download that tool from: # http://www.lsi.com/downloads/Public/MegaRAID%20Common%20Files/8.02.16_MegaCLI.zip if which MegaCli >/dev/null ; then echo '<<>>' for part in $(MegaCli -EncInfo -aALL -NoLog < /dev/null \ | sed -rn 's/:/ /g; s/[[:space:]]+/ /g; s/^ //; s/ $//; s/Number of enclosures on adapter ([0-9]+).*/adapter \1/g; /^(Enclosure|Device ID|adapter) [0-9]+$/ p'); do [ $part = adapter ] && echo "" [ $part = 'Enclosure' ] && echo -ne "\ndev2enc" echo -n " $part" done echo MegaCli -PDList -aALL -NoLog < /dev/null | egrep 'Enclosure|Raw Size|Slot Number|Device Id|Firmware state|Inquiry|Adapter' echo '<<>>' MegaCli -LDInfo -Lall -aALL -NoLog < /dev/null | egrep 'Size|State|Number|Adapter|Virtual' echo '<<>>' MegaCli -AdpBbuCmd -GetBbuStatus -aALL -NoLog < /dev/null | grep -v Exit fi # 3WARE disk controller (by Radoslaw Bak) if which tw_cli > /dev/null ; then for C in $(tw_cli show | awk 'NR < 4 { next } { print $1 }'); do echo '<<<3ware_info>>>' tw_cli /$C show all | egrep 'Model =|Firmware|Serial' echo '<<<3ware_disks>>>' tw_cli /$C show drivestatus | egrep 'p[0-9]' | sed "s/^/$C\//" echo '<<<3ware_units>>>' tw_cli /$C show unitstatus | egrep 'u[0-9]' | sed "s/^/$C\//" done fi if which vcbVmName > /dev/null 2>&1 ; then echo '<<>>' vcbVmName -s any fi # VirtualBox Guests. Section must always been output. Otherwise the # check would not be executed in case no guest additions are installed. # And that is something the check wants to detect echo '<<>>' if which VBoxControl > /dev/null 2>&1 ; then VBoxControl -nologo guestproperty enumerate | cut -d, -f1,2 [ ${PIPESTATUS[0]} = 0 ] || echo "ERROR" fi if which ntpq > /dev/null 2>&1 ; then echo '<<>>' # remote heading, make first column space separated waitmax 2 ntpq -p | sed -e 1,2d -e 's/^\(.\)/\1 /' -e 's/^ /%/' fi if which nvidia-settings >/dev/null && [ -S /tmp/.X11-unix/X0 ] then echo '<<>>' for var in GPUErrors GPUCoreTemp do DISPLAY=:0 waitmax 2 nvidia-settings -t -q $var | sed "s/^/$var: /" done fi if [ -e /proc/drbd ]; then echo '<<>>' cat /proc/drbd fi # Status of CUPS printer queues if which lpstat > /dev/null 2>&1; then if pgrep cups > /dev/null 2>&1; then echo '<<>>' waitmax 3 lpstat -p echo '---' waitmax 3 lpstat -o|sort fi fi # Heartbeat monitoring if which cl_status > /dev/null 2>&1; then # Different handling for heartbeat clusters with and without CRM # for the resource state if [ -S /var/run/heartbeat/crm/cib_ro -o -S /var/run/crm/cib_ro ]; then echo '<<>>' crm_mon -1 -r | grep -v ^$ | sed '/^\sResource Group:/,$ s/^\s//; s/^\s/_/g' else echo '<<>>' cl_status rscstatus fi echo '<<>>' for NODE in $(cl_status listnodes); do if [ $NODE != $(echo $HOSTNAME | tr 'A-Z' 'a-z') ]; then STATUS=$(cl_status nodestatus $NODE) echo -n "$NODE $STATUS" for LINK in $(cl_status listhblinks $NODE 2>/dev/null); do echo -n " $LINK $(cl_status hblinkstatus $NODE $LINK)" done echo fi done fi # Postfix mailqueue monitoring # # Only handle mailq when postfix user is present. The mailq command is also # available when postfix is not installed. But it produces different outputs # which are not handled by the check at the moment. So try to filter out the # systems not using postfix by searching for the postfix user.a # # Cannot take the whole outout. This could produce several MB of agent output # on blocking queues. # Only handle the last 6 lines (includes the summary line at the bottom and # the last message in the queue. The last message is not used at the moment # but it could be used to get the timestamp of the last message. if which mailq >/dev/null 2>&1 && getent passwd postfix >/dev/null 2>&1; then echo '<<>>' mailq | tail -n 6 fi # Check status of OMD sites if type omd >/dev/null then echo '<<>>' omd status --bare --auto fi # Einbinden von lokalen Plugins, die eine eigene Sektion ausgeben if cd $PLUGINSDIR then for skript in $(ls) do if [ -x "$skript" ] ; then ./$skript fi done fi # Lokale Einzelchecks echo '<<>>' if cd $LOCALDIR then for skript in $(ls) do if [ -x "$skript" ] ; then ./$skript fi done fi # MK's Remote Plugin Executor if [ -e "$MK_CONFDIR/mrpe.cfg" ] then echo '<<>>' grep -Ev '^[[:space:]]*($|#)' "$MK_CONFDIR/mrpe.cfg" | \ while read descr cmdline do PLUGIN=${cmdline%% *} OUTPUT=$(eval "$cmdline") echo -n "(${PLUGIN##*/}) $descr $? $OUTPUT" | tr \\n \\1 echo done fi