Module: check_mk
Branch: master
Commit: 9a266f88d2a0c7aa742c424bc0c2607245e40876
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=9a266f88d2a0c7…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Tue Jul 26 09:13:40 2011 +0200
Added some preflight checks to --scan-parents code to prevent strange behaviour
---
.bugs/189 | 16 ++++++++++++++++
.bugs/190 | 16 ++++++++++++++++
.bugs/191 | 12 ++++++++++++
ChangeLog | 1 +
modules/check_mk.py | 21 +++++++++++++++++++--
5 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/.bugs/189 b/.bugs/189
new file mode 100644
index 0000000..dd7b21a
--- /dev/null
+++ b/.bugs/189
@@ -0,0 +1,16 @@
+Title: scan parents overwrites parents.mk which has not been created by scan
+Component: core
+State: done
+Class: nastiness
+Date: 2011-07-25 15:57:55
+Benefit: 3
+Cost: 3
+Fun: 0
+
+Scanparents function should check if the parents.mk has been created by the
+scanparents code and only overwrite the file if it veriified that the
+overwriting the file would not overwrite manually configured parents.mk files
+
+2011-07-26 08:48:27: changed state open -> done
+Checking the first line of the file for the "automatically created by --scan-parents" string
+and terminate when this can not be found.
diff --git a/.bugs/190 b/.bugs/190
new file mode 100644
index 0000000..2370a0f
--- /dev/null
+++ b/.bugs/190
@@ -0,0 +1,16 @@
+Title: scan parent should terminate when no traceroute bin in path
+Component: core
+State: done
+Class: cleanup
+Date: 2011-07-25 15:59:55
+Benefit: 2
+Cost: 2
+Fun: 0
+
+At the moment scanparents executes traceroute for each host and
+shows up an error sh: traceroute: command not found
+
+This should be checked once at the start and show up an error.
+
+2011-07-26 08:49:03: changed state open -> done
+Added preflight check to verify that traceroute is installed
diff --git a/.bugs/191 b/.bugs/191
new file mode 100644
index 0000000..d13d832
--- /dev/null
+++ b/.bugs/191
@@ -0,0 +1,12 @@
+Title: scan parents: documentation about output missing
+Component: doku
+Benefit: 3
+State: open
+Cost: 3
+Date: 2011-07-25 16:08:41
+Class: todo
+
+There is no documentation about the meanings of the output of the scan parents
+feature.
+
+The single chars like o, L, G aare not mentioned in the docs.
diff --git a/ChangeLog b/ChangeLog
index 0fd749d..e9d34c3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -29,6 +29,7 @@
* j4p_performance agent plugin: Supports basic/digest auth now
* New checks j4p_performance.threads and j4p_performance.uptime which
track the number of threads and the uptime of a JMX process
+ * Added some preflight checks to --scan-parents code
Multisite:
* FIX: make non-Ascii characters in services names work again
diff --git a/modules/check_mk.py b/modules/check_mk.py
index d023305..3a5b468 100755
--- a/modules/check_mk.py
+++ b/modules/check_mk.py
@@ -3369,6 +3369,12 @@ def do_cleanup_autochecks():
sys.stdout.write("Deleting %s\n" % f)
os.remove(f)
+def check_bin_in_path(prog):
+ for path in os.environ['PATH'].split(os.pathsep):
+ f = path + '/' + prog
+ if os.path.exists(f) and os.access(f, os.X_OK):
+ return True
+
def do_scan_parents(hosts):
global max_num_processes
if len(hosts) == 0:
@@ -3383,6 +3389,19 @@ def do_scan_parents(hosts):
if max_num_processes < 1:
max_num_processes = 1
+ outfilename = check_mk_configdir + "/parents.mk"
+
+ if not check_bin_in_path('traceroute'):
+ raise MKGeneralException('The traceroute command can not be found in PATH?')
+
+ if os.path.exists(outfilename):
+ first_line = file(outfilename, "r").readline()
+ if not first_line.startswith('# Automatically created by --scan-parents at'):
+ raise MKGeneralException("conf.d/parents.mk seems to be created manually.\n\n"
+ "The --scan-parents function would overwrite this file.\n"
+ "Please rename it to keep the configuration or delete "
+ "the file and try again.")
+
sys.stdout.write("Scanning for parents (%d processes)..." % max_num_processes)
sys.stdout.flush()
while len(hosts) > 0:
@@ -3416,7 +3435,6 @@ def do_scan_parents(hosts):
parent_rules.append( (monitoring_host, [host]) )
import pprint
- outfilename = check_mk_configdir + "/parents.mk"
out = file(outfilename, "w")
out.write("# Automatically created by --scan-parents at %s\n\n" % time.asctime())
out.write("# Do not edit this file. If you want to convert an\n")
@@ -3433,7 +3451,6 @@ def do_scan_parents(hosts):
out.write("parents += %s\n\n" % pprint.pformat(parent_rules))
sys.stdout.write("\nWrote %s\n" % outfilename)
-
def scan_parents_of(hosts):
nagios_ip = lookup_ipaddress(monitoring_host)
os.putenv("LANG", "")
Module: check_mk
Branch: master
Commit: 888177cc4e8461374698027659f7303ead508354
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=888177cc4e8461…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Tue Jul 26 17:07:54 2011 +0200
Added manpages for some new j4p checks
---
checkman/j4p_performance.threads | 40 ++++++++++++++++++++++++++++++++++++++
checkman/j4p_performance.uptime | 22 ++++++++++++++++++++
2 files changed, 62 insertions(+), 0 deletions(-)
diff --git a/checkman/j4p_performance.threads b/checkman/j4p_performance.threads
new file mode 100644
index 0000000..75c203c
--- /dev/null
+++ b/checkman/j4p_performance.threads
@@ -0,0 +1,40 @@
+title: Check threads of Java application server via JMX and j4p.war
+agents: linux
+author: Lars Michelsen <lm(a)mathias-kettner.de>
+license: GPL
+distribution: check_mk
+description:
+ This checks uses the JSON HTTP service of j4p (from jmx4perl) to
+ monitor the number of threads created by a Java application server like
+ Tomcat or JBoss. You can define levels against the number of threads
+ of the Java machine.
+
+ Take a look at the {j4p_performance.mem} manpage for instructions
+ on how to set up the needed agent plugin.
+
+item:
+ The instance name as configured in {j4p.conf} with the variable
+ {instance}.
+
+examples:
+ # Set default levels for number of threads to (WARN 50, CRIT 80)
+ j4p_performance_threads_default_levels = (50, 80)
+
+perfdata:
+ Several variables:
+ ThreadRate: Number of threads created per interval.
+ ThreadCount: Number of threads.
+ DeamonThreadCount: Number of daemon threads.
+ PeakThreadCount: Maximum number of threads since program start.
+ TotalStartedThreadCount: total number of started threads since program start.
+
+inventory:
+ One check is created for each JMX instance sent by the agent.
+
+[parameters]
+warning (int): The warning level for number of threads
+critical (int): The critical level for number of threads
+
+[configuration]
+j4p_performance_threads_default_levels (int, int): Default levels for number of threads.
+ This is preset to {(80, 100)}.
diff --git a/checkman/j4p_performance.uptime b/checkman/j4p_performance.uptime
new file mode 100644
index 0000000..8575de9
--- /dev/null
+++ b/checkman/j4p_performance.uptime
@@ -0,0 +1,22 @@
+title: Check uptime of Java application server via JMX and j4p.war
+agents: linux
+author: Lars Michelsen <lm(a)mathias-kettner.de>
+license: GPL
+distribution: check_mk
+description:
+ This checks uses the JSON HTTP service of j4p (from jmx4perl) to
+ monitor the uptime of a Java application server like
+ Tomcat or JBoss.
+
+ Take a look at the {j4p_performance.mem} manpage for instructions
+ on how to set up the needed agent plugin.
+
+item:
+ The instance name as configured in {j4p.conf} with the variable
+ {instance}.
+
+perfdata:
+ One variable {uptime} with the uptime in seconds since program start.
+
+inventory:
+ One check is created for each JMX instance sent by the agent.
Module: check_mk
Branch: master
Commit: 1b8f307a7e231eb0d04e8f5348b3ea2815876a6e
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=1b8f307a7e231e…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Mon Jul 25 11:47:51 2011 +0200
Improved mk_oracle agent plugin
* Added caching of results to prevent problems with long
running SQL queries. Cache is controlled by CACHE_MAXAGE var which
is preset to
120 seconds
* EXCLUDE_<sid>=ALL or EXCLUDE_<sid>=oracle_sessions can be
used to exclude specific checks now
* Added optional configuration file to configure the new options
---
ChangeLog | 6 ++
agents/plugins/mk_oracle | 122 +++++++++++++++++++++++++++++++--------------
2 files changed, 90 insertions(+), 38 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 93f605f..7e1403e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -20,6 +20,12 @@
* vbox_guest: new check for checking guest additions of Linux virtual box hosts
* if/if64: Fixed bug in operstate detection when using old tuple based params
* if/if64: Fixed bug in operstate detection when using tuple of valid operstates
+ * mk_oracle: Added caching of results to prevent problems with long
+ running SQL queries. Cache is controlled by CACHE_MAXAGE var which is preset to
+ 120 seconds
+ * mk_oracle: EXCLUDE_<sid>=ALL or EXCLUDE_<sid>=oracle_sessions can be
+ used to exclude specific checks now
+ * mk_oracle: Added optional configuration file to configure the new options
Multisite:
* FIX: make non-Ascii characters in services names work again
diff --git a/agents/plugins/mk_oracle b/agents/plugins/mk_oracle
old mode 100644
new mode 100755
index 18e7bcf..ad0f114
--- a/agents/plugins/mk_oracle
+++ b/agents/plugins/mk_oracle
@@ -26,52 +26,98 @@
# Check_MK agent plugin for monitoring ORACLE databases
# Get list of all running databases
-if [ $(uname -s) = Linux ]
+SIDS=$(UNIX95=true ps ax -o args | sed -n '/^ora_pmon_\([^ ]*\)/s//\1/p')
+if [ -z "$SIDS" ] ; then exit 0 ; fi
+
+CACHE_MAXAGE=120
+
+# Source the optional configuration file for this agent plugin
+if [ -e "$MK_CONFDIR/mk_oracle.cfg" ]
then
- PS="ps ax -o args"
-else
- PS="ps -ef -o args"
+ . $MK_CONFDIR/mk_oracle.cfg
fi
-SIDS=$(UNIX95=true $PS | sed -n '/^ora_pmon_\([^ ]*\)/s//\1/p')
-if [ -z "$SIDS" ] ; then exit 0 ; fi
+# It is possible to filter SIDS totally. Just add the following to
+# the mk_oracle.cfg file:
+#
+# EXCLUDE_<sid>="ALL"
+#
+# Another option is to filter single checks for SIDS. Just add
+# lines as follows to the mk_oracle.cfg file. One service per
+# line:
+#
+# EXCLUDE_<sid>="<service>"
+#
+# For example skip oracle_sessions and oracle_logswitches checks
+# for the instance "mysid".
+#
+# EXCLUDE_mysid="oracle_sessions oracle_logswitches"
+#
+#
+# This check uses a cache file to prevent problems with long running
+# SQL queries. It starts building a cache when
+# a) no cache is present or the cache is too old and
+# b) the cache is not currently being built
+# The cache is used for $CACHE_MAXAGE seconds. The CACHE_MAXAGE
+# option is pre-set to 120 seconds but can be changed in mk_oracle.cfg.
-# Make an sqlplus query, clean up the result and add an artificial
-# column with the SID at the front of each line. If the query fails,
-# no output at all is made.
-function sqlplus
-{
- OUTPUT=$({ echo 'set pages 0' ; echo 'set lines 8000' ; echo 'set feedback off'; cat ; } | $MK_CONFDIR/sqlplus.sh $1) || return 1
- echo "${OUTPUT}" | sed -e 's/[[:space:]][[:space:]]*/ /g' -e '/^[[:space:]]*$/d' -e "s/^/$1 /"
-}
+for SID in $SIDS; do
+ EXCLUDE=EXCLUDE_$SID
+ EXCLUDE=${!EXCLUDE}
+ # SID filtered totally?
+ if [ "$EXCLUDE" = "ALL" ]; then
+ continue
+ fi
-# Sessions
-echo '<<<oracle_sessions>>>'
-for SID in $SIDS
-do
- echo "select count(*) from v"'$'"session where status = 'ACTIVE';" | sqlplus "$SID"
-done
+ CACHE_FILE=$MK_CONFDIR/oracle_$SID.cache
-echo '<<<oracle_logswitches>>>'
-for SID in $SIDS
-do
- sqlplus "$SID" <<EOF
-select count(*) from v\$loghist where first_time > sysdate - 1/24;
-EOF
-done
+ # Check if file exists and recent enough
+ if [ -s $CACHE_FILE ]; then
+ NOW=$(date +%s)
+ MTIME=$(stat -c %Y $CACHE_FILE)
+ if [ $(($NOW - $MTIME)) -le $CACHE_MAXAGE ]; then
+ USE_CACHE_FILE=1
+ fi
+ fi
+
+ # Outputs the contents of the cache file when it exists and is recent enough.
+ # When not it checks if there is another task to update the cache is running
+ # at the moment (*.new) file present.
+ # When the cache is old and there is no *new file present, then start a query
+ # to update the information for this instance.
+ if [ -z "$USE_CACHE_FILE" -a ! -e "$CACHE_FILE.new" ]
+ then
+ setsid bash -c "
+ set -o noclobber
+ function sqlplus ()
+ {
+ OUTPUT=\$({ echo 'set pages 0' ; echo 'set lines 8000' ; echo 'set feedback off'; cat ; } | $MK_CONFDIR/sqlplus.sh \$1) || return 1
+ echo \"\${OUTPUT}\" | sed -e 's/[[:space:]]\+/ /g' -e '/^[[:space:]]*$/d' -e \"s/^/\$1 /\"
+ }
+
+ {
+ # Only execute checks when not filtered
+ if [ "$EXCLUDE" = "${EXCLUDE/oracle_sessions/}" ]; then
+ echo '<<<oracle_sessions>>>'
+ echo \"select count(*) from v\\\$session where status = 'ACTIVE';\" | sqlplus \"$SID\"
+ fi
+ if [ "$EXCLUDE" = "${EXCLUDE/oracle_logswitches/}" ]; then
+ echo '<<<oracle_logswitches>>>'
+ echo \"select count(*) from v\\\$loghist where first_time > sysdate - 1/24;\" | sqlplus \"$SID\"
+ fi
-# Tablespaces
-echo '<<<oracle_tablespaces>>>'
-for SID in $SIDS
-do
- sqlplus "$SID" <<EOF | sed 's/READ ONLY/READONLY/g'
-select f.file_name, f.tablespace_name, f.status, f.AUTOEXTENSIBLE, f.blocks, f.maxblocks, f.blocks - b.free_blocks as used_blocks, f.INCREMENT_BY, f.ONLINE_STATUS, t.BLOCK_SIZE, t.status
-from dba_data_files f, dba_tablespaces t ,(SELECT file_id, SUM(blocks) free_blocks FROM dba_free_space b GROUP BY file_id) b
-where f.tablespace_name = t.tablespace_name and f.file_id=b.file_id
+ if [ "$EXCLUDE" = "${EXCLUDE/oracle_tablespaces/}" ]; then
+ echo '<<<oracle_tablespaces>>>'
+ sqlplus "$SID" <<EOF | sed 's/READ ONLY/READONLY/g'
+select f.file_name, f.tablespace_name, f.status, f.AUTOEXTENSIBLE, f.blocks, f.maxblocks, f.USER_BLOCKS, f.INCREMENT_BY, f.ONLINE_STATUS, t.BLOCK_SIZE, t.status from dba_data_files f, dba_tablespaces t where f.tablespace_name = t.tablespace_name
UNION
-select f.file_name, f.tablespace_name, f.status, f.AUTOEXTENSIBLE, f.blocks, f.maxblocks, b.blocks_used as used_blocks, f.INCREMENT_BY, 'TEMP', t.BLOCK_SIZE, t.status
-from dba_temp_files f, dba_tablespaces t ,V\$TEMP_SPACE_HEADER b
-where f.tablespace_name = t.tablespace_name and f.file_id=b.file_id ;
+select f.file_name, f.tablespace_name, f.status, f.AUTOEXTENSIBLE, f.blocks, f.maxblocks, f.USER_BLOCKS, f.INCREMENT_BY, 'TEMP', t.BLOCK_SIZE, t.status from dba_temp_files f, dba_tablespaces t where f.tablespace_name = t.tablespace_name;
EOF
+ fi
+ } > $CACHE_FILE.new && mv $CACHE_FILE.new $CACHE_FILE || rm -f $CACHE_FILE*
+ "
+ else
+ cat "$CACHE_FILE"
+ fi
done
Module: check_mk
Branch: master
Commit: f3f3a7cf152f85223add20b2fb5d4a2728e8bd40
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=f3f3a7cf152f85…
Author: Florian Heigl <fh(a)mathias-kettner.de>
Date: Fri Jul 22 11:05:41 2011 +0200
FreeBSD Agent: support netctr output for older versions that don't have extra counters for idrop and similar. (Thanks to Mark McCorke)
---
agents/check_mk_agent.freebsd | 67 ++++++++++++++--------------------------
1 files changed, 24 insertions(+), 43 deletions(-)
diff --git a/agents/check_mk_agent.freebsd b/agents/check_mk_agent.freebsd
index 421690e..d6ca648 100755
--- a/agents/check_mk_agent.freebsd
+++ b/agents/check_mk_agent.freebsd
@@ -69,13 +69,19 @@ echo '<<<check_mk>>>'
echo Version: 1.1.11i2
echo AgentOS: freebsd
+
+
+osver="$(uname -r)"
+
+
# Partitionen (-P verhindert Zeilenumbruch bei langen Mountpunkten)
# Achtung: NFS-Mounts werden grundsaetzlich ausgeblendet, um
# Haenger zu vermeiden. Diese sollten ohnehin besser auf dem
# Server, als auf dem Client ueberwacht werden.
echo '<<<df>>>'
-# no special zfs handling so far, actually you won't even SEE zfs fs in this check. Need testers.
+# no special zfs handling so far, the ZFS.pools plugin has been tested to
+# work on FreeBSD
df -kTP -t ufs | egrep -v '(Filesystem|devfs|procfs|fdescfs|basejail)'
# Check NFS mounts by accessing them with stat -f (System
@@ -146,13 +152,20 @@ echo `date +%s` - `sysctl -n kern.boottime | cut -d' ' -f 4,7 | tr ',' '.' | tr
# cfggen 0 DISPLAY | egrep '(Target ID|State|Volume ID|Status of volume)[[:space:]]*:' | sed -e 's/ *//g' -e 's/:/ /'
#fi
-# Multipathgeraete
-# remove, CAM doesn't speak multipath so far.
+
+# Multipathing is supported in FreeBSD by now
+# see "gmultipath" or
+# http://www.mywushublog.com/2010/06/freebsd-and-multipath/
+# it's queried using "gmultipath status" and seems to the 2nd-most
+# straightforward multipath implementation since AIX
+
+
# Soft-RAID
# need to replace with extra section for GEOM and ZFS because of larger /
# different featuresets.
+
# Performancecounter Platten
#echo '<<<diskstat>>>'
#date +%s
@@ -164,10 +177,16 @@ echo `date +%s` - `sysctl -n kern.boottime | cut -d' ' -f 4,7 | tr ',' '.' | tr
#cat /proc/vmstat /proc/stat
# Network device statistics (Packets, Collisions, etc)
-# picking the interface dubbed "Link/Num" gives the phys. interface, otherwise we don't see error counters.
+# only the "Link/Num" interface has all counters.
echo '<<<netctr>>>'
date +%s
-netstat -inb | egrep -v '(^Name|lo|plip)' | grep Link | awk '{print $1" "$8" "$5" "$6" "$7" 0 0 0 0 "$11" "$9" "$10" 0 0 0 0 0"}'
+if [ "$(echo $osver | cut -f1 -d\. )" -gt "8" ]; then
+ netstat -inb | egrep -v '(^Name|lo|plip)' | grep Link | awk '{print $1" "$8" "$5" "$6" "$7" 0 0 0 0 "$11" "$9" "$10" 0 0 0 0 0"}'
+else
+ # pad output for freebsd 7 and before
+ netstat -inb | egrep -v '(^Name|lo|plip)' | grep Link | awk '{print $1" "$7"
+"$5" "$6" 0 0 0 0 0 "$10" "$8" "$9" 0 0 "$11" 0 0"}'
+fi
# IPMI-Data (Fans, CPU, temperature, etc)
@@ -181,44 +200,6 @@ if which ipmitool >/dev/null ; then
| grep -v ' discrete '
fi
-# IPMI data via ipmi-sensors (of freeipmi). Please make sure, that if you
-# have installed freeipmi that IPMI is really support by your hardware.
-# The agent tries to avoid hanging forever by setting a limit of 300 seconds
-# for the first run (where the cache is created). If ipmi-sensors runs into
-# that timeout, it leaves and empty cache file. We skip this check forever
-# if we find that empty cache file.
-#sdrcache=/var/cache/.freeipmi/sdr-cache/sdr-cache-$(hostname).127.0.0.1
-#if which ipmi-sensors >/dev/null && [ ! -e "$sdrcache" -o -s "$sdrcache" ]
-#then
-# echo '<<<ipmi_sensors>>>'
-# # No cache file existing? => Impose a high time limit. We do not suffice
-# # in creating the cache we most probably run on a hardware where this tool
-# # is hanging forever. We make sure that we never try again in that case!
-# if [ ! -e "$sdrcache" ]
-# then
-# WAITMAX="waitmax 300"
-# elif tail --bytes 2 < "$sdrcache" | od -t x2 | grep -q 0a0a
-# then
-# WAITMAX="waitmax 3"
-# else
-# # Cache file corrupt. Must end with two linefeeds.
-# rm -f $sdrcache
-# WAITMAX=
-# fi
-# # Newer ipmi-sensors version have new output format; Legacy format can be used
-# if ipmi-sensors --help | grep -q legacy-output; then
-# IPMI_FORMAT="--legacy-output"
-# else
-# IPMI_FORMAT=""
-# fi
-# for class in Temperature 'Power Unit' Fan
-# do
-# $WAITMAX ipmi-sensors $IPMI_FORMAT --sdr-cache-directory /var/cache -g "$class" | sed -e 's/ /_/g' -e 's/:_\?/ /g' -e 's@ \([^(]*\)_(\([^)]*\))@ \2_\1@'
-# # In case of a timeout immediately leave loop.
-# if [ $? = 255 ] ; then break ; fi
-# WAITMAX="waitmax 3"
-# done
-#fi
# State of LSI MegaRAID controller via MegaCli. You can download that tool from:
# http://www.lsi.com/DistributionSystem/AssetDocument/support/downloads/megar…
Module: check_mk
Branch: master
Commit: f3c294f1e832e1c4b01efaca81fc21dd855bce67
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=f3c294f1e832e1…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Fri Jul 22 10:57:14 2011 +0200
Updated bug entries
---
.bugs/315 | 11 +++++++++++
1 files changed, 11 insertions(+), 0 deletions(-)
diff --git a/.bugs/315 b/.bugs/315
new file mode 100644
index 0000000..ca19841
--- /dev/null
+++ b/.bugs/315
@@ -0,0 +1,11 @@
+Title: embedded PNP-graphs broken if none are there
+Component: multisite
+Benefit: 1
+State: open
+Cost: 1
+Date: 2011-07-22 10:56:11
+Class: nastiness
+
+If you got to the service details of a service without performance
+data, then the embedded PNP-Graph area is not empty but shows
+seven broken images.