introduce averaging
Message-ID: <516bbec2.L4Ldid4nyDYj/UnB%mk(a)mathias-kettner.de>
User-Agent: Heirloom mailx 12.4 7/29/08
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Module: check_mk
Branch: master
Commit: caf4f1d640a5aaedfa10403ba32da205aa175596
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=caf4f1d640a5aa…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Mon Apr 15 10:47:58 2013 +0200
esx_vsphere_hostsystem.cpu_usage: introduce averaging
---
checkman/esx_vsphere_hostsystem.cpu_usage | 21 ++++---
checks/esx_vsphere_hostsystem | 43 ++++++-------
.../check_mk-esx_vsphere_hostsystem.cpu_usage.php | 63 ++++++++++++++------
web/plugins/perfometer/check_mk.py | 4 +-
4 files changed, 78 insertions(+), 53 deletions(-)
diff --git a/checkman/esx_vsphere_hostsystem.cpu_usage
b/checkman/esx_vsphere_hostsystem.cpu_usage
index be957b4..d90f252 100644
--- a/checkman/esx_vsphere_hostsystem.cpu_usage
+++ b/checkman/esx_vsphere_hostsystem.cpu_usage
@@ -8,19 +8,24 @@ description:
It also shows you the number of sockets, cores and threads.
perfdata:
- One value: the current usage in terms of cores. The maximum value sent
- is the number of core. If you have 12 cores installed and the usage is 50%,
- then a value of 6 will be sent. That way the performance graph is able
- to show the number of cores correctly. The warn/crit levels are also
- converted in relation to the number of cores.
+ One or two values: the first value is current usage in percent - ranging from
+ 0 to 100. The "maximum" value is not 100, but the number of CPU threads.
This
+ case be used for scaling the graph in terms of the number of used CPU threads.
+
+ If averaging is enabled then a second value is sent: the averaged CPU utilization
+ ranging from 0 to 100.
inventory:
One check per ESX Host System will be created.
[parameters]
-warning (float): A percentage for the warning level ranging from 0 to 100
-critical (float): A percentage for the critical level ranging from 0 to 100
+parameters(dict): A dictionary with the following keys:
+
+ {"levels"}: Either {None} for no levels, a tuple of warn and crit (in
percent) or
+ a dictionary with predictive levels settings.
+
+ {"average"}: A number of minutes for enabling averaging.
[configuration]
-esx_host_cpu_default_levels(float, float): Default levels, preset to {None}, which means
that no levels
+esx_host_cpu_default_levels(dict): Default levels, preset to an empty dictionary, which
means that no levels
will be applied.
diff --git a/checks/esx_vsphere_hostsystem b/checks/esx_vsphere_hostsystem
index 65b4ab0..3f10e9c 100644
--- a/checks/esx_vsphere_hostsystem
+++ b/checks/esx_vsphere_hostsystem
@@ -41,7 +41,7 @@ def esx_vsphere_hostsystem_convert(info):
# | |
# +----------------------------------------------------------------------+
-esx_host_cpu_default_levels = None
+esx_host_cpu_default_levels = {}
# hardware.cpuInfo.numCpuCores 12
# hardware.cpuInfo.numCpuPackages 2
@@ -55,12 +55,12 @@ def inventory_esx_vsphere_hostsystem_cpu(info):
if 'summary.quickStats.overallCpuUsage' in data \
and 'hardware.cpuInfo.hz' in data\
and 'hardware.cpuInfo.numCpuCores' in data:
- return [(None, 'esx_host_cpu_default_levels')]
+ return [(None, {})]
def check_esx_vsphere_hostsystem_cpu(item, params, info):
data = esx_vsphere_hostsystem_convert(info)
- num_cores = int(data['hardware.cpuInfo.numCpuCores'][0])
num_sockets = int(data['hardware.cpuInfo.numCpuPackages'][0])
+ num_cores = int(data['hardware.cpuInfo.numCpuCores'][0])
num_threads = int(data['hardware.cpuInfo.numCpuThreads'][0])
used_mhz = float(data['summary.quickStats.overallCpuUsage'][0])
mhz_per_core = float(data['hardware.cpuInfo.hz'][0]) / 1024.0 / 1024.0
@@ -71,35 +71,30 @@ def check_esx_vsphere_hostsystem_cpu(item, params, info):
infotext = "%.1f%%" % usage
- state = 0
- if params:
- warn, crit = params
- if usage >= crit:
- state = 2
- infotext += " (levels at %d%%/%d%%)" % (warn, crit)
- elif usage >= warn:
- state = 1
- infotext += " (levels at %d%%/%d%%)" % (warn, crit)
- warn_perf = warn * per_core
- crit_perf = crit * per_core
- else:
- warn_perf, crit_perf = None, None
+ # Convert legacy parameters
+ this_time = time.time()
+ state, infotext, perfdata = check_cpu_util(usage, params)
infotext += ", %.2fGHz/%.2fGHz" % (used_mhz / 1024.0, total_mhz / 1024.0)
-
infotext += ", %d sockets, %d cores/socket, %d threads" % (
num_sockets, num_cores / num_sockets, num_threads)
- perf = [("usage", usage * per_core, warn_perf, crit_perf, 0, 100 *
per_core)]
- return (state, infotext, perf)
+ # put number of threads as MAX value for first perf-data. This
+ # is needed by the PNP template.
+ perfdata_cpu = list(perfdata[0])
+ perfdata_cpu[-1] = num_threads
+ perfdata = [ tuple(perfdata_cpu) ] + perfdata[1:]
+ return (state, infotext, perfdata)
check_info['esx_vsphere_hostsystem.cpu_usage'] = {
- "inventory_function" : inventory_esx_vsphere_hostsystem_cpu,
- "check_function" : check_esx_vsphere_hostsystem_cpu,
- "service_description" : "CPU utilization",
- "group" : "cpu_utilization",
- "has_perfdata" : True
+ "inventory_function" : inventory_esx_vsphere_hostsystem_cpu,
+ "check_function" : check_esx_vsphere_hostsystem_cpu,
+ "service_description" : "CPU utilization",
+ "group" : "cpu_utilization_os",
+ "has_perfdata" : True,
+ "default_levels_variable" : "esx_host_cpu_default_levels",
+ "includes" : [ "cpu_util.include" ],
}
diff --git a/pnp-templates/check_mk-esx_vsphere_hostsystem.cpu_usage.php
b/pnp-templates/check_mk-esx_vsphere_hostsystem.cpu_usage.php
index f4c47e3..82c4d1d 100644
--- a/pnp-templates/check_mk-esx_vsphere_hostsystem.cpu_usage.php
+++ b/pnp-templates/check_mk-esx_vsphere_hostsystem.cpu_usage.php
@@ -23,30 +23,57 @@
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
-$num_cores = $MAX[1];
-$warnperc = $WARN[1] / $num_cores * 100.0;
-$critperc = $CRIT[1] / $num_cores * 100.0;
+# Do not depend on numbers, use names
+$RRD = array();
+foreach ($NAME as $i => $n) {
+ $RRD[$n] = "$RRDFILE[$i]:$DS[$i]:MAX";
+ $WARN[$n] = $WARN[$i];
+ $CRIT[$n] = $CRIT[$i];
+ $MIN[$n] = $MIN[$i];
+ $MAX[$n] = $MAX[$i];
+}
+
+$num_threads = $MAX[1];
+$warnthreads = $WARN[1] * $num_threads / 100.0;
+$critthreads = $CRIT[1] * $num_threads / 100.0;
+$rightscale = 100.0 / $num_threads;
+
+$opt[1] = "--vertical-label 'Used CPU threads' --right-axis $rightscale:0
--right-axis-format '%4.1lf%%' -l0 -ru $num_threads --title \"CPU
Utilization for $hostname ($num_threads CPU threads)\" ";
-$opt[1] = "--vertical-label 'Used cores' -l0 -ru $num_cores --title
\"CPU Utilization for $hostname\" ";
+$def[1] = "DEF:perc=$RRD[util] "
+ . "CDEF:util=perc,$num_threads,*,100,/ "
+ ;
+
+$def[1] .= "HRULE:$MAX[util]#0040d0:\"$num_threads CPU Threads\\n\"
"
+ ;
-$def[1] = "DEF:util=$RRDFILE[1]:$DS[1]:MAX "
- . "CDEF:perc=util,$num_cores,/,100,* "
- . "AREA:util#60f020:\"Utilization\:\" "
- . "LINE:util#308010 "
- . "GPRINT:perc:LAST:\"%0.1lf%% \" ";
+$def[1] .= "AREA:util#60f020:\"Utilization\:\" "
+ . "LINE:util#50b01a "
+ . "GPRINT:perc:LAST:\"%.1lf%%\" "
+ . "GPRINT:util:LAST:\"(%.1lf Threads) \" "
+ . "GPRINT:perc:MIN:\"min\: %.1lf%%,\" "
+ . "GPRINT:util:MIN:\"(%.1lf), \" "
+ . "GPRINT:perc:MAX:\"max\: %.1lf%%\" "
+ . "GPRINT:util:MAX:\"(%.1lf)\\n\" "
+ ;
-if ($WARN[1]) {
- $def[1] .= "HRULE:$WARN[1]#fff000:\"Warn at $warnperc% \" "
- . "HRULE:$CRIT[1]#ff0000:\"Critical at $critperc%\\n\"
";
+
+if (isset($RRD["avg"])) {
+ $def[1] .= "DEF:aperc=$RRD[avg] ".
+ "CDEF:avg=aperc,$num_threads,*,100,/ ".
+ "LINE:avg#004000:\"Averaged\: \" ".
+ "GPRINT:aperc:LAST:\"%.1lf%%,\" ".
+ "GPRINT:aperc:MIN:\"min\: %.1lf%%,\" ".
+ "GPRINT:aperc:MAX:\"max\: %.1lf%%\\n\" ".
+ "";
+}
+
+if ($WARN['util']) {
+ $def[1] .= "HRULE:$warnthreads#fff000:\"Warn at $WARN[util]% \"
"
+ . "HRULE:$critthreads#ff0000:\"Critical at $CRIT[util]%\\n\"
";
}
else {
$def[1] .= "COMMENT:\"\\n\" ";
}
-$def[1] .= "HRULE:$MAX[1]#0040d0:\"$num_cores Cores installed \" "
- . "GPRINT:util:MIN:\"Min\: %5.2lf Cores \" "
- . "GPRINT:util:MAX:\"Max\: %5.2lf Cores\" "
- . "GPRINT:util:LAST:\"Last\: %4.1lf Cores\\n\" "
- ;
-
?>
diff --git a/web/plugins/perfometer/check_mk.py b/web/plugins/perfometer/check_mk.py
index f959cf6..fd6290d 100644
--- a/web/plugins/perfometer/check_mk.py
+++ b/web/plugins/perfometer/check_mk.py
@@ -646,9 +646,7 @@ perfometers['check_mk-esx_vsphere_hostsystem.mem_usage'] =
perfometer_simple_mem
perfometers['check_mk-esx_vsphere_virtualmachine.mem_usage'] =
perfometer_simple_mem_usage
def perfometer_esx_vsphere_hostsystem_cpu(row, command, perf):
- cores = float(perf[0][6])
- used = float(perf[0][1])
- used_perc = used / cores * 100.0
+ used_perc = float(perf[0][1])
return "%d%%" % used_perc, perfometer_linear(used_perc,
"#60f020")
perfometers['check_mk-esx_vsphere_hostsystem.cpu_usage'] =
perfometer_esx_vsphere_hostsystem_cpu