Module: check_mk
Branch: master
Commit: b71a121883f920eae77f3dab7a56dd6c4e391632
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=b71a121883f920…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Tue Jan 14 10:49:39 2014 +0100
mem.win: Allow time-averaging of values before applying levels
You now can set an averaging time horizon in minutes. If you do that
the levels for memory and page file usage will be applied to their
average over time. That make the check less reactive to small peaks
in the memory usage.
---
.werks/445 | 11 ++++++++
ChangeLog | 1 +
checkman/mem.win | 4 +++
checks/mem | 35 +++++++++++++++++-------
pnp-templates/check_mk-mem.win.php | 49 ++++++++++++++++++++++++----------
web/plugins/wato/check_parameters.py | 1 +
6 files changed, 78 insertions(+), 23 deletions(-)
diff --git a/.werks/445 b/.werks/445
new file mode 100644
index 0000000..13ce0ab
--- /dev/null
+++ b/.werks/445
@@ -0,0 +1,11 @@
+Title: mem.win: Allow time-averaging of values before applying levels
+Level: 1
+Component: checks
+Version: 1.2.5i1
+Date: 1389692900
+Class: feature
+
+You now can set an averaging time horizon in minutes. If you do that
+the levels for memory and page file usage will be applied to their
+average over time. That make the check less reactive to small peaks
+in the memory usage.
diff --git a/ChangeLog b/ChangeLog
index 66ec11d..537241b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,6 +40,7 @@
* 0079 f5_bigip_conns: new check to monitor number of current connections
* 0324 hitachi_hnas_cifs: new check for the number of users using a CIFS share
* 0455 hitachi_hnas_span: new check for Spans (Storage Pools) in Hitachi HNAS storage
systems
+ * 0445 mem.win: Allow time-averaging of values before applying levels...
* 0103 FIX: services: Fixed bug with service inventory defined in main.mk...
* 0299 FIX: borcade_mlx_fan: Prettified output, handling "other" state now
* 0300 FIX: cisco_fru_power: Trying not to inventorize not plugged in FRUs...
diff --git a/checkman/mem.win b/checkman/mem.win
index d9fc6a4..296e52c 100644
--- a/checkman/mem.win
+++ b/checkman/mem.win
@@ -21,6 +21,7 @@ examples:
memory_win_default_levels = {
"memory" : (80.0, 90.0), # alert at 80%/90% usage
"pagefile" : (2048, 1024), # alert, if less then 2/1 GB free
+ "average" : 60, # apply levels on 60-min average
}
# Disable memory levels for all hosts with the tag "test"
@@ -44,6 +45,9 @@ parameters (dict): This check uses a dictionary as parameter. The
following
{"pagefile"} Warning and critical levels for page file usage. The same
rules apply as for {memory}.
+ {"average"} This optional parameters sets a value in minutes for averaging.
+ In that case all warn/crit levels are applied to the averaged values.
+
[configuration]
memory_win_default_levels (dict): Levels used by
diff --git a/checks/mem b/checks/mem
index e97e3d9..8c5736a 100644
--- a/checks/mem
+++ b/checks/mem
@@ -93,6 +93,8 @@ def check_mem_windows(item, params, info):
infotxts = []
MB = 1024.0 * 1024
worststate = 0
+ now = time.time()
+
for title, what, paramname in [
( "Memory", "Mem", "memory" ),
( "Page file", "Page", "pagefile" )]:
@@ -103,8 +105,30 @@ def check_mem_windows(item, params, info):
free_mb = free_kb / 1024.0
perc = 100.0 * used_kb / total_kb
- # Now check the levels
+ infotxts.append("%s usage: %.1f%% (%.1f/%.1f GB)" %
+ (title, perc, used_kb / MB, total_kb / MB))
+
warn, crit = params[paramname]
+
+ # In perfdata show warn/crit as absolute values
+ if type(warn) == float:
+ warn_kb = total_kb * warn / 100 / 1024
+ if type(crit) == float:
+ crit_kb = total_kb * crit / 100 / 1024
+ perfdata.append((paramname, used_kb / 1024.0, warn_kb, crit_kb, 0, total_kb /
1024.0))
+
+ # Do averaging, if configured, just for matching the levels
+ if "average" in params:
+ average_min = params["average"]
+ timedif, used_kb = get_average("mem.win.%s" % paramname,
+ now, used_kb, average_min, initialize_zero =
False)
+ used_mb = used_kb / 1024.0
+ free_mb = (total_kb / 1024.0) - used_mb
+ perc = 100.0 * used_kb / total_kb
+ infotxts[-1] += ", %d min average: %.1f%% (%.1f GB)" %
(average_min, perc, used_kb / MB)
+ perfdata.append((paramname + "_avg", used_kb / 1024.0))
+
+ # Now check the levels
if (type(crit) == int and free_mb <= crit) or \
(type(crit) == float and perc >= crit):
worststate = 2
@@ -116,15 +140,8 @@ def check_mem_windows(item, params, info):
else:
state_code = ""
- # Convert levels to absolute values (for perfdata)
- if type(warn) == float:
- warn = total_kb * warn / 100 / 1024
- if type(crit) == float:
- crit = total_kb * crit / 100 / 1024
+ infotxts[-1] += state_code
- infotxts.append("%s usage: %.1f%% (%.1f/%.1f GB)%s" %
- (title, perc, used_kb / MB, total_kb / MB, state_code))
- perfdata.append((paramname, used_kb / 1024.0, warn, crit, 0, total_kb / 1024.0))
return (worststate, ", ".join(infotxts), perfdata)
diff --git a/pnp-templates/check_mk-mem.win.php b/pnp-templates/check_mk-mem.win.php
index 46181f6..a7d3342 100644
--- a/pnp-templates/check_mk-mem.win.php
+++ b/pnp-templates/check_mk-mem.win.php
@@ -23,9 +23,19 @@
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
-$maxmem = $MAX[1] / 1024.0;
+// Make data sources available via names
+$RRD = array();
+foreach ($NAME as $i => $n) {
+ $RRD[$n] = "$RRDFILE[$i]:$DS[$i]:MAX";
+ $WARN[$n] = $WARN[$i];
+ $CRIT[$n] = $CRIT[$i];
+ $MIN[$n] = $MIN[$i];
+ $MAX[$n] = $MAX[$i];
+}
+
+$maxmem = $MAX["memory"] / 1024.0;
$maxmemprint = sprintf("%5.2f", $maxmem);
-$maxpage = $MAX[2] / 1024.0;
+$maxpage = $MAX["pagefile"] / 1024.0;
$maxpageprint = sprintf("%5.2f", $maxpage);
$opt[1] = " --vertical-label 'Gigabytes' -X0 "
@@ -34,29 +44,40 @@ $opt[1] = " --vertical-label 'Gigabytes' -X0 "
. " --title \"Memory and page file usage $hostname\" ";
-$def[1] = "DEF:mem=$RRDFILE[1]:$DS[1]:MAX "
+$def[1] = "DEF:mem=$RRD[memory] "
. "CDEF:memgb=mem,1024,/ "
- . "DEF:page=$RRDFILE[2]:$DS[2]:MAX "
+ . "DEF:page=$RRD[pagefile] "
. "CDEF:pagegb=page,1024,/ "
. "CDEF:mpagegb=pagegb,-1,* "
-
- . "AREA:$maxmem#a0f8c0:\"$maxmemprint GB RAM \" "
- . "AREA:memgb#20d060 "
+
+ . "AREA:$maxmem#b0ffe0:\"$maxmemprint GB RAM \" "
+ . "AREA:memgb#40f090 "
. "GPRINT:memgb:LAST:\"%5.2lf GB last\" "
. "GPRINT:memgb:AVERAGE:\"%5.2lf GB avg\" "
. "GPRINT:memgb:MAX:\"%5.2lf GB max\" "
- . "HRULE:".($WARN[1]/1024)."#FFFF00:\"Warn\" "
- . "HRULE:".($CRIT[1]/1024)."#FF0000:\"Crit\\n\" "
+ .
"HRULE:".($WARN["memory"]/1024)."#FFFF00:\"Warn\"
"
+ .
"HRULE:".($CRIT["memory"]/1024)."#FF0000:\"Crit\\n\"
"
- . "AREA:\"-$maxpage\"#a0d0e8:\"$maxpageprint GB page
file\" "
- . "AREA:mpagegb#3040d0 "
+ . "AREA:\"-$maxpage\"#b0e0f0:\"$maxpageprint GB page
file\" "
+ . "AREA:mpagegb#90b0ff "
. "GPRINT:pagegb:LAST:\"%5.2lf GB last\" "
. "GPRINT:pagegb:AVERAGE:\"%5.2lf GB avg\" "
. "GPRINT:pagegb:MAX:\"%5.2lf GB max\" "
- . "HRULE:".(-$WARN[2]/1024)."#FFFF00:\"Warn\" "
- . "HRULE:".(-$CRIT[2]/1024)."#FF0000:\"Crit\\n\" "
-
+ .
"HRULE:".(-$WARN["pagefile"]/1024)."#FFFF00:\"Warn\"
"
+ .
"HRULE:".(-$CRIT["pagefile"]/1024)."#FF0000:\"Crit\\n\"
"
+ ;
+# If averaging is enabled then we get two further metrics
+if (isset($RRD["memory_avg"])) {
+ $def[1] .= ""
+ . "DEF:memavg=$RRD[memory_avg] "
+ . "CDEF:memavggb=memavg,1024,/ "
+ . "LINE:memavggb#006000:\"Memory Average \" "
+ . "DEF:pageavg=$RRD[pagefile_avg] "
+ . "CDEF:pageavggb=pageavg,1024,/ "
+ . "CDEF:mpageavggb=pageavggb,-1,* "
+ . "LINE:mpageavggb#000060:\"Pagefile Average\\n\" "
;
+}
?>
diff --git a/web/plugins/wato/check_parameters.py b/web/plugins/wato/check_parameters.py
index 2d95917..408fbf1 100644
--- a/web/plugins/wato/check_parameters.py
+++ b/web/plugins/wato/check_parameters.py
@@ -1147,6 +1147,7 @@ register_check_parameters(
"default, averaging is turned off. "),
unit = _("minutes"),
minvalue = 1,
+ default_value = 60,
)
),