Module: check_mk
Branch: master
Commit: cf692e0b75b77f31d60d23a54901f49a6f269500
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=cf692e0b75b77f…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Fri Mar 18 08:58:08 2016 +0100
3292 livestatus_status: Added Microcore relevant metrics like helper usage and latency
The livestatus_status check was missing some performance indicators like the helper usage
and check latency which are only available in the Microcore. These metrics have now been
added to this check.
---
.werks/3292 | 12 +++++++++
ChangeLog | 1 +
checks/livestatus_status | 36 ++++++++++++++++++++-------
web/plugins/metrics/check_mk.py | 52 +++++++++++++++++++++++++++++++++++++++
4 files changed, 92 insertions(+), 9 deletions(-)
diff --git a/.werks/3292 b/.werks/3292
new file mode 100644
index 0000000..1c58a9a
--- /dev/null
+++ b/.werks/3292
@@ -0,0 +1,12 @@
+Title: livestatus_status: Added Microcore relevant metrics like helper usage and latency
+Level: 1
+Component: checks
+Class: feature
+Compatible: compat
+State: unknown
+Version: 1.2.9i1
+Date: 1458287805
+
+The livestatus_status check was missing some performance indicators like the helper
usage
+and check latency which are only available in the Microcore. These metrics have now been
+added to this check.
diff --git a/ChangeLog b/ChangeLog
index c69076f..2b9b397 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -33,6 +33,7 @@
* 3303 cisco_fru_module_status: new check which monitors the operational status of
FRU modules of Cisco devices which support the CISCO-ENTITY-FRU-CONTROL-MIB
* 3012 jolokia_metrics.threads: Improved check output, now showing the actual problem
and levels
* 3272 oracle_tablespaces: severity of check for autoextension is now
configurable...
+ * 3292 livestatus_status: Added Microcore relevant metrics like helper usage and
latency...
* 3073 FIX: windows agent: relative paths to mrpe scripts are now treated as relative
to the agent installation directory...
* 3061 FIX: mk_jolokia: Fixed debugging of the agent plugin
* 3074 FIX: windows agent: fixed incorrect values for 32-bit performance counters
diff --git a/checks/livestatus_status b/checks/livestatus_status
index 0ba088e..8d50d4c 100644
--- a/checks/livestatus_status
+++ b/checks/livestatus_status
@@ -84,17 +84,35 @@ def check_livestatus_status(item, params, parsed):
# Check Performance counters
this_time = time.time()
status_counters = [
- ( "host_checks", "Host Checks", ),
- ( "service_checks", "Service Checks", ),
- ( "forks", "Process Creations", ),
- ( "connections", "Livestatus Connects", ),
- ( "requests", "Livestatus Requests", ),
- ( "log_messages", "Log Messages", ),
+ # conv_func, factor, calc_rate, column, format, title
+ (int, 1, True, "%.1f/s", "host_checks", "Host
Checks", ),
+ (int, 1, True, "%.1f/s", "service_checks", "Service
Checks", ),
+ (int, 1, True, "%.1f/s", "forks", "Process
Creations", ),
+ (int, 1, True, "%.1f/s", "connections", "Livestatus
Connects", ),
+ (int, 1, True, "%.1f/s", "requests", "Livestatus
Requests", ),
+ (int, 1, True, "%.1f/s", "log_messages", "Log
Messages", ),
]
- for counter, title in status_counters:
- rate = get_rate("livestatus_status.%s.%s" % (item, counter), this_time,
int(status[counter]))
- yield 0, ("%.1f %s/s" % (rate, title)), [(counter, rate)]
+ if status["program_version"].startswith("Check_MK"):
+ # We have a CMC here.
+
+ status_counters += [
+ # conv_func, factor, calc_rate, format, column, title
+ (float, 1, False, "%.3fs", "average_latency_generic",
"Average check latency"),
+ (float, 1, False, "%.3fs", "average_latency_cmk",
"Average Check_MK latency"),
+ (float, 100, False, "%.1f%%", "helper_usage_generic",
"Check helper usage"),
+ (float, 100, False, "%.1f%%", "helper_usage_cmk",
"Check_MK helper usage"),
+ (float, 100, False, "%.1f%%", "livestatus_usage",
"Livestatus usage"),
+ (float, 1, False, "%.1f/s",
"livestatus_overflows_rate", "Livestatus overflow rate"),
+ ]
+
+ for conv_func, factor, calc_rate, fmt, counter, title in status_counters:
+ value = factor * conv_func(status[counter])
+
+ if calc_rate:
+ value = get_rate("livestatus_status.%s.%s" % (item, counter),
this_time, value)
+
+ yield 0, ("%s: %s" % (title, fmt % value)), [(counter, value)]
yield 0, "%d Hosts" % int(status["num_hosts"]),
[("monitored_hosts", int(status["num_hosts"]))]
yield 0, "%d Services" % int(status["num_services"]),
[("monitored_services", int(status["num_services"]))]
diff --git a/web/plugins/metrics/check_mk.py b/web/plugins/metrics/check_mk.py
index 8056665..45f0b02 100644
--- a/web/plugins/metrics/check_mk.py
+++ b/web/plugins/metrics/check_mk.py
@@ -2086,6 +2086,42 @@ metric_info["livestatus_request_rate"] = {
"color" : "#bbccdd",
}
+metric_info["helper_usage_cmk"] = {
+ "title" : _("Check_MK helper usage"),
+ "unit" : "%",
+ "color" : "15/a",
+}
+
+metric_info["helper_usage_generic"] = {
+ "title" : _("Generic helper usage"),
+ "unit" : "%",
+ "color" : "41/a",
+}
+
+metric_info["average_latency_cmk"] = {
+ "title" : _("Check_MK check latency"),
+ "unit" : "s",
+ "color" : "15/a",
+}
+
+metric_info["average_latency_generic"] = {
+ "title" : _("Check latency"),
+ "unit" : "s",
+ "color" : "41/a",
+}
+
+metric_info["livestatus_usage"] = {
+ "title" : _("Livestatus usage"),
+ "unit" : "%",
+ "color" : "12/a",
+}
+
+metric_info["livestatus_overflows_rate"] = {
+ "title" : _("Livestatus overflows"),
+ "unit" : "1/s",
+ "color" : "16/a",
+}
+
metric_info["log_message_rate"] = {
"title" : _("Log messages"),
"unit" : "1/s",
@@ -5825,6 +5861,22 @@ graph_info.append({
})
graph_info.append({
+ "title" : _("Check helper usage"),
+ "metrics" : [
+ ( "helper_usage_cmk", "area" ),
+ ( "helper_usage_generic", "area" ),
+ ],
+})
+
+graph_info.append({
+ "title" : _("Average check latency"),
+ "metrics" : [
+ ( "average_latency_cmk", "area" ),
+ ( "average_latency_generic", "area" ),
+ ],
+})
+
+graph_info.append({
"title" : _("Pending updates"),
"metrics" : [
( "normal_updates", "stack" ),