Module: check_mk
Branch: master
Commit: 688610983aaf6d29ea57de323a0c142967d16fec
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=688610983aaf6d…
Author: Óscar Nájera <on(a)mathias-kettner.de>
Date: Wed Jan 23 18:11:16 2019 +0100
WATO rules for k8s network interfaces
CMK-1542
Change-Id: I9dd74194a09e96822dbfa473b751bb21fc9c559d
---
checks/k8s_stats | 64 +++++++++++++++++++++-------
cmk/gui/plugins/wato/check_parameters/if.py | 47 ++++++++++++++++++++
tests/unit/cmk/gui/watolib/test_rulespecs.py | 33 +++++++++++++-
3 files changed, 128 insertions(+), 16 deletions(-)
diff --git a/checks/k8s_stats b/checks/k8s_stats
index d07b0c3..fe3c5ed 100644
--- a/checks/k8s_stats
+++ b/checks/k8s_stats
@@ -39,6 +39,29 @@ def discover_k8s_stats_fs(key, value):
return [x for x in devices if x not in ignore_fs]
+def k8s_network_err_pac(device, params, now):
+ for name, mway, pway in [('Input', 'rx', 'in'),
('Output', 'tx', 'out')]:
+ pac_rate = get_rate('if.%s.if_%s_pkts' % (device['name'], pway),
now,
+ device['%s_packets' % mway])
+ err_rate = get_rate('if.%s.if_%s_errors' % (device['name'],
pway), now,
+ device['%s_errors' % mway])
+ warn, crit = params.get('errors', (None, None))
+ perf_data = [('if_%s_pkts' % pway, pac_rate), ('if_%s_errors' %
pway, err_rate)]
+ if isinstance(warn, float):
+ try:
+ err_perc = 100.0 * err_rate / (err_rate + pac_rate)
+ except ZeroDivisionError:
+ err_perc = 0
+
+ status, infotext, _perf = check_levels(
+ err_perc, 'err_perc', (warn, crit), unit='%',
infoname=name + " errors percentage")
+ else: # absolute levels or no levels
+ status, infotext, _perf = check_levels(
+ err_rate, 'if_errors', (warn, crit), unit='/s',
infoname=name + " error rate")
+
+ yield status, infotext, perf_data
+
+
def check_k8s_stats_network(item, params, metrics):
def get_if():
for device in metrics['network']['interfaces']:
@@ -46,21 +69,31 @@ def check_k8s_stats_network(item, params, metrics):
return device
device = get_if()
-
- perf_names = [
- ('In', 'rx_bytes', 'in', lambda x: "%s/s" %
get_bytes_human_readable(x)),
- ('Out', 'tx_bytes', 'out', lambda x: "%s/s" %
get_bytes_human_readable(x)),
- ('Input Packets', 'rx_packets', 'if_in_pkts', None),
- ('Output Packets', 'tx_packets', 'if_out_pkts', None),
- ('Input Errors', 'rx_errors', 'if_in_errors', None),
- ('Output Errors', 'tx_errors', 'if_out_errors', None),
- ('Input Discards', 'rx_dropped', 'if_in_discards',
None),
- ('Output Discards', 'tx_dropped', 'if_out_discards',
None),
- ]
-
- for name, met, dsname, hrf in perf_names:
- rate = get_rate('if.%s.%s' % (item, dsname), time.time(), device[met])
- yield check_levels(rate, dsname, params, unit='/s',
human_readable_func=hrf, infoname=name)
+ now = time.strptime(metrics['timestamp'][:-4],
"%Y-%m-%dT%H:%M:%S.%f")
+ now = time.mktime(now) - time.timezone
+
+ # Bandwidth
+ for name, met, dsname in [('In', 'rx_bytes', 'in'),
('Out', 'tx_bytes', 'out')]:
+ rate = get_rate('if.%s.%s' % (item, dsname), now, device[met])
+ yield check_levels(
+ rate,
+ dsname,
+ None,
+ unit='/s',
+ human_readable_func=lambda x: "%s/s" %
get_bytes_human_readable(x),
+ infoname=name)
+
+ # Errors / Packets
+ for check_result in k8s_network_err_pac(device, params, now):
+ yield check_result
+
+ # Discards
+ for name, met, dsname in [
+ ('Input Discards', 'rx_dropped', 'if_in_discards'),
+ ('Output Discards', 'tx_dropped', 'if_out_discards'),
+ ]:
+ rate = get_rate('if.%s.%s' % (item, dsname), now, device[met])
+ yield check_levels(rate, dsname, params.get('discards'),
unit='/s', infoname=name)
def check_k8s_stats_fs(item, params, metrics):
@@ -91,6 +124,7 @@ check_info['k8s_stats.network'] = {
'check_function': check_k8s_stats_network,
'service_description': 'Interface %s',
"has_perfdata": True,
+ "group": "k8s_if",
'includes': ['k8s.include'],
}
diff --git a/cmk/gui/plugins/wato/check_parameters/if.py
b/cmk/gui/plugins/wato/check_parameters/if.py
index e11e369..7a9647d 100644
--- a/cmk/gui/plugins/wato/check_parameters/if.py
+++ b/cmk/gui/plugins/wato/check_parameters/if.py
@@ -583,3 +583,50 @@ register_check_parameters(
TextAscii(title=_("port specification"), allow_empty=False),
"dict",
)
+
+register_check_parameters(
+ RulespecGroupCheckParametersNetworking,
+ "k8s_if",
+ _("Kubernetes Network interfaces"),
+ Dictionary(elements=[
+ ("errors",
+ Alternative(
+ title=_("Levels for error rates"),
+ help=
+ _("These levels make the check go warning or critical whenever the
"
+ "<b>percentual error rate</b> or the <b>absolute
error rate</b> of the monitored interface reaches "
+ "the given bounds. The percentual error rate is computed by dividing
number of "
+ "errors by the total number of packets (successful plus
errors)."),
+ elements=[
+ Tuple(
+ title=_("Percentual levels for error rates"),
+ elements=[
+ Percentage(
+ title=_("Warning at"),
+ unit=_("percent errors"),
+ default_value=0.01,
+ display_format='%.3f'),
+ Percentage(
+ title=_("Critical at"),
+ unit=_("percent errors"),
+ default_value=0.1,
+ display_format='%.3f')
+ ]),
+ Tuple(
+ title=_("Absolute levels for error rates"),
+ elements=[
+ Integer(title=_("Warning at"),
unit=_("errors")),
+ Integer(title=_("Critical at"),
unit=_("errors"))
+ ])
+ ])),
+ ("discards",
+ Tuple(
+ title=_("Absolute levels for discards rates"),
+ elements=[
+ Integer(title=_("Warning at"), unit=_("discards")),
+ Integer(title=_("Critical at"), unit=_("discards"))
+ ])),
+ ]),
+ TextAscii(title=_("port specification"), allow_empty=False),
+ "dict",
+)
diff --git a/tests/unit/cmk/gui/watolib/test_rulespecs.py
b/tests/unit/cmk/gui/watolib/test_rulespecs.py
index fd35786..a6f662d 100644
--- a/tests/unit/cmk/gui/watolib/test_rulespecs.py
+++ b/tests/unit/cmk/gui/watolib/test_rulespecs.py
@@ -31,7 +31,6 @@ from cmk.gui.plugins.wato.utils import (
)
-
def test_rulespec_sub_group():
class TestGroup(RulespecGroup):
@property
@@ -143,6 +142,7 @@ def test_grouped_rulespecs():
'static_checks:bluecat_dns',
'static_checks:bluecat_ha',
'static_checks:steelhead_connections',
+ 'static_checks:k8s_if',
],
'eventconsole': [
'extra_host_conf:_ec_event_limit',
@@ -242,6 +242,7 @@ def test_grouped_rulespecs():
'checkgroup_parameters:cisco_qos',
'if_groups',
'if_disable_if64_hosts',
+ 'checkgroup_parameters:k8s_if',
'checkgroup_parameters:adva_ifs',
'checkgroup_parameters:bluecat_ntp',
'checkgroup_parameters:bluecat_dhcp',
@@ -6079,6 +6080,21 @@ expected_rulespecs = {
'title': u'JVM uptime (since last reboot)',
'valuespec_class_name': 'TimeperiodValuespec'
},
+ 'checkgroup_parameters:k8s_if': {
+ 'factory_default': [],
+ 'group_name': 'checkparams/networking',
+ 'help': None,
+ 'is_deprecated': False,
+ 'is_optional': False,
+ 'item_enum': None,
+ 'item_help': None,
+ 'item_name': u'port specification',
+ 'item_spec_class_name': 'TextAscii',
+ 'item_type': 'item',
+ 'match_type': 'dict',
+ 'title': u'Kubernetes Network interfaces',
+ 'valuespec_class_name': 'TimeperiodValuespec'
+ },
'checkgroup_parameters:k8s_pods_cpu': {
'factory_default': [],
'group_name': 'checkparams/applications',
@@ -14154,6 +14170,21 @@ expected_rulespecs = {
'title': u'JVM uptime (since last reboot)',
'valuespec_class_name': 'Tuple'
},
+ 'static_checks:k8s_if': {
+ 'factory_default': [],
+ 'group_name': 'static/networking',
+ 'help': None,
+ 'is_deprecated': False,
+ 'is_optional': False,
+ 'item_enum': None,
+ 'item_help': None,
+ 'item_name': None,
+ 'item_spec_class_name': 'TextAscii',
+ 'item_type': None,
+ 'match_type': 'all',
+ 'title': u'Kubernetes Network interfaces',
+ 'valuespec_class_name': 'Tuple'
+ },
'static_checks:k8s_pods_cpu': {
'factory_default': [],
'group_name': 'static/applications',