Module: check_mk
Branch: master
Commit: eaa67d79f3c0f46313e1300807aed7b9ea9eff64
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=eaa67d79f3c0f4…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Sun Nov 14 15:56:36 2010 +0100
kernel: convert perfdata to rates, perfometer
---
ChangeLog | 5 +++++
checkman/kernel | 11 ++++++-----
checks/kernel | 20 ++++++++++++++------
pnp-templates/check_mk-kernel.php | 8 ++++----
web/plugins/perfometer/check_mk.py | 6 ++++++
5 files changed, 35 insertions(+), 15 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 197dacc..1395a16 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -17,6 +17,11 @@
* Renamed check functions of imm_health check from test_imm to imm_health
to have valid function and check names. Please remove remove from
inventory and re-inventory those checks.
+ * Check kernel: converted performance data from counters to rates. This
+ fixes RRD problems (spikes) on reboots and also allows better access
+ to the peformance data for the Perf-O-Meters. Also changed service
+ descriptions. You need to reinventurize the kernel checks. You old
+ RRDs will not be deleted, new ones will be created.
Core, Setup, etc.:
* Improve error handling: if hosts are monitored with SNMP *and* TCP,
diff --git a/checkman/kernel b/checkman/kernel
index 6270609..e1308d4 100644
--- a/checkman/kernel
+++ b/checkman/kernel
@@ -16,21 +16,23 @@ description:
be checked.
item:
- The name of the kernel counter as found in {/proc/stat} or
+ {"Context Switches"}, {"Process Creations"}, {"Major Page
Faults"} or the name
+ of the kernel counter in question as found in {/proc/stat} or
{/proc/vmstat}.
examples:
# Monitor major page faults, set levels to 200 and 400 per second
# Do this on all hosts with the tag "lnx"
checks += [
- ( ["lnx"], ALL_HOSTS, "kernel", "pgmajfault", (200, 400)
)
+ ( ["lnx"], ALL_HOSTS, "kernel", "Major Page Faults",
(200, 400) )
]
perfdata:
One variable: the current value of the counter.
inventory:
- Per default one check for {pgmajfault}, {ctxt} and {processes}
+ Per default one check for {"Major Page Faults"},
+ {"Context Switches"} and {"Process Creations"}
will be created on each host providing that data. You can
override this list globally by setting {inventory_kernel_counters}
to a list of strings with those counters that should be inventorized.
@@ -39,7 +41,7 @@ inventory:
examples:
# Make inventory create only checks for pgmajfault
- inventory_kernel_counters = [ "pgmajfault" ]
+ inventory_kernel_counters = [ "pgmajfault", "pgpgin",
"pgpgout" ]
[parameters]
warning (int): The rate per second which triggers a warning. A value of {None} disables
the level.
@@ -51,4 +53,3 @@ inventory_kernel_counters (list of strings): List of performance
counters the in
kernel_default_levels (int, int): Default levels for newly inventorized checks. Default
is {(None, None)}, which disables the levels and makes the check always OK.
-
diff --git a/checks/kernel b/checks/kernel
index fb6aa9b..85acd25 100644
--- a/checks/kernel
+++ b/checks/kernel
@@ -28,12 +28,19 @@
inventory_kernel_counters = [ "pgmajfault", "ctxt",
"processes" ]
kernel_default_levels = (None, None)
+kernel_counter_names = {
+ "ctxt" : "Context Switches",
+ "processes" : "Process Creations",
+ "pgmajfault" : "Major Page Faults",
+}
+
def inventory_kernel(checktype, info):
inventory = []
for counter in inventory_kernel_counters:
hits = [ line[0] for line in info[1:] if line[0] == counter ]
if len(hits) == 1:
- inventory.append( (counter, "kernel_default_levels") )
+ countername = kernel_counter_names.get(counter, counter)
+ inventory.append( (countername, "kernel_default_levels") )
return inventory
@@ -41,21 +48,22 @@ def inventory_kernel(checktype, info):
def check_kernel(item, params, info):
this_time = int(info[0][0])
- hits = [ line[1] for line in info[1:] if line[0] == item ]
+ hits = [ (line[0], line[1]) for line in info[1:] if line[0] == item or
kernel_counter_names.get(line[0], line[0]) == item ]
if len(hits) == 0:
return (3, "UNKNOWN - item '%s' not found in agent output" %
item)
elif len(hits) > 1:
return (3, "UNKNOWN - item '%s' not unique (found %d times)" %
(item, len(hits)))
- this_val = int(hits[0])
+ counter = hits[0][0]
+ this_val = int(hits[0][1])
timedif, per_sec = get_counter("kernel." + item, this_time, this_val)
infotext = " - %.0f/s in last %d secs" % (per_sec, timedif)
+
if params == None:
- perfdata = [ (item, "%dc" % this_val, "", "", 0 )
]
- return (0, "OK" + infotext, perfdata)
+ return (0, "OK" + infotext, [ (counter, per_sec) ])
warn, crit = params
- perfdata = [ (item, "%dc" % this_val, warn, crit, 2) ]
+ perfdata = [ (counter, per_sec, warn, crit) ]
if warn == None and crit != None:
infotext += " (critical at %.0f/s)" % crit
elif warn != None and crit == None:
diff --git a/pnp-templates/check_mk-kernel.php b/pnp-templates/check_mk-kernel.php
index c6e9052..d2bf69d 100644
--- a/pnp-templates/check_mk-kernel.php
+++ b/pnp-templates/check_mk-kernel.php
@@ -24,7 +24,7 @@
# Boston, MA 02110-1301 USA.
$subtype = substr($servicedesc, 7);
-if ($subtype == "pgmajfault") {
+if ($subtype == "pgmajfault" || $subtype == "Major_Page_Faults") {
$title = "Major Page Faults";
$vertical = "faults / sec";
$format = "%5.1lf/s";
@@ -32,7 +32,7 @@ if ($subtype == "pgmajfault") {
$color = "20ff80";
$line = "10a040";
}
-else if ($subtype == "ctxt") {
+else if ($subtype == "ctxt" || $subtype == "Context_Switches") {
$title = "Context Switches";
$vertical = "switches / sec";
$format = "%5.1lf/s";
@@ -40,7 +40,7 @@ else if ($subtype == "ctxt") {
$color = "80ff20";
$line = "40a010";
}
-else if ($subtype == "processes") {
+else if ($subtype == "processes" || $subtype == "Process_Creations")
{
$title = "Process creation";
$vertical = "new processes / sec";
$format = "%5.1lf/s";
@@ -57,7 +57,7 @@ else {
$line = "90a010";
}
-$opt[1] = " --vertical-label \"$vertical\" -X0 -l 0 -u $upto --title
\"$title\" ";
+$opt[1] = " --vertical-label \"$vertical\" -X0 -l 0 -u $upto --title
\"$hostname: $title\" ";
$def[1] = "DEF:var1=$RRDFILE[1]:$DS[1]:MAX ";
$def[1] .= "AREA:var1#$color:\"$title\:\" ";
diff --git a/web/plugins/perfometer/check_mk.py b/web/plugins/perfometer/check_mk.py
index ce5cd4e..28cdc71 100644
--- a/web/plugins/perfometer/check_mk.py
+++ b/web/plugins/perfometer/check_mk.py
@@ -120,6 +120,12 @@ def perfometer_check_mk_cpu_threads(row, check_command, perf_data):
perfometers["check_mk-cpu.threads"] = perfometer_check_mk_cpu_threads
+def perfometer_check_mk_kernel(row, check_command, perf_data):
+ rate = float(perf_data[0][1])
+ return "%.1f/s" % rate, perfometer_logarithmic(rate, 1000, 2,
"#da6")
+
+perfometers["check_mk-kernel"] = perfometer_check_mk_kernel
+
def perfometer_check_mk_cpu_loads(row, check_command, perf_data):
color = { 0: "#68f", 1: "#ff2", 2: "#f22", 3:
"#fa2" }[row["service_state"]]