Module: check_mk
Branch: master
Commit: a76dacd3167f3388c5f177e6949ee9883b14b0e2
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=a76dacd3167f33…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Tue Mar 26 14:50:45 2019 +0100
WK7250: Check_MK Discovery: Clusters were unable to automatically discover new services
Change-Id: Ie975f6998666c67ee598bface62457720fa34476
---
.werks/7250 | 22 ++++++++++++++
cmk_base/automations/check_mk.py | 4 ++-
cmk_base/discovery.py | 64 ++++++++++++++++++++++++----------------
3 files changed, 64 insertions(+), 26 deletions(-)
diff --git a/.werks/7250 b/.werks/7250
new file mode 100644
index 0000000..fb19dbf
--- /dev/null
+++ b/.werks/7250
@@ -0,0 +1,22 @@
+Title: Check_MK Discovery: Clusters were unable to automatically discover new services
+Level: 1
+Component: checks
+Class: fix
+Compatible: incomp
+Edition: cre
+State: unknown
+Version: 1.6.0i1
+Date: 1553243799
+
+The <tt>Check_MK Discovery</tt> check for a cluster was able to identify
new/vanished services.
+However, the automatic discovery mechanism in the background failed to integrate them
into the configuration.
+
+
+This has been fixed. There is still some constraint which makes this werk incompatible.
+The <tt>Periodic service discovery</tt> rule allows you to configure
<tt>Automatcally activate changes</tt> for the specified host.
+If you want automatic activation of new services for cluster hosts, you have to
configured a rule on the nodes instead of the cluster hosts.
+So any <tt>Periodic service discovery</tt> rules of clusters should be
duplicated onto the nodes.
+
+
+This requirement is currently necessary because of the internal configuration.
+Clusters never have an explicit configuration file, the actual clusters configuration is
compute on demand.
diff --git a/cmk_base/automations/check_mk.py b/cmk_base/automations/check_mk.py
index b883ba6..9c4d799 100644
--- a/cmk_base/automations/check_mk.py
+++ b/cmk_base/automations/check_mk.py
@@ -116,7 +116,9 @@ class AutomationDiscovery(DiscoveryAutomation):
for hostname in hostnames:
result, error = discovery.discover_on_host(how, hostname, do_snmp_scan,
use_caches,
on_error)
- counts[hostname] = result
+ counts[hostname] = result["self_new"],
result["self_removed"], result[
+ "self_kept"], result["self_total"]
+
if error is not None:
failed_hosts[hostname] = error
else:
diff --git a/cmk_base/discovery.py b/cmk_base/discovery.py
index f3a4abd..ae38c19 100644
--- a/cmk_base/discovery.py
+++ b/cmk_base/discovery.py
@@ -213,10 +213,17 @@ def discover_on_host(mode,
use_caches,
on_error="ignore",
service_filter=None):
- counts = {"added": 0, "removed": 0, "kept": 0}
+ counts = {
+ "self_new": 0,
+ "self_removed": 0,
+ "self_kept": 0,
+ "self_total": 0,
+ "clustered_new": 0,
+ "clustered_vanished": 0,
+ }
if hostname not in config.all_active_realhosts():
- return [0, 0, 0, 0], ""
+ return counts, ""
if service_filter is None:
service_filter = lambda hostname, check_plugin_name, item: True
@@ -228,7 +235,7 @@ def discover_on_host(mode,
# checks of the host, so that _get_host_services() does show us the
# new discovered check parameters.
if mode == "refresh":
- counts["removed"] += remove_autochecks_of(hostname) # this is
cluster-aware!
+ counts["self_removed"] += remove_autochecks_of(hostname) # this is
cluster-aware!
if config.is_cluster(hostname):
ipaddress = None
@@ -259,13 +266,13 @@ def discover_on_host(mode,
if check_source == "new":
if mode in ("new", "fixall", "refresh") and
service_filter(
hostname, check_plugin_name, item):
- counts["added"] += 1
+ counts["self_new"] += 1
new_items[(check_plugin_name, item)] = paramstring
elif check_source in ("old", "ignored"):
# keep currently existing valid services in any case
new_items[(check_plugin_name, item)] = paramstring
- counts["kept"] += 1
+ counts["self_kept"] += 1
elif check_source == "vanished":
# keep item, if we are currently only looking for new services
@@ -273,12 +280,13 @@ def discover_on_host(mode,
if mode not in ("fixall",
"remove") or not service_filter(hostname,
check_plugin_name, item):
new_items[(check_plugin_name, item)] = paramstring
- counts["kept"] += 1
+ counts["self_kept"] += 1
else:
- counts["removed"] += 1
+ counts["self_removed"] += 1
elif check_source.startswith("clustered_"):
# Silently keep clustered services
+ counts[check_source] += 1
new_items[(check_plugin_name, item)] = paramstring
else:
@@ -292,8 +300,9 @@ def discover_on_host(mode,
if cmk.utils.debug.enabled():
raise
err = str(e)
- return [counts["added"], counts["removed"],
counts["kept"],
- counts["added"] + counts["kept"]], err
+
+ counts["self_total"] = counts["self_new"] +
counts["self_kept"]
+ return counts, err
#.
@@ -397,8 +406,12 @@ def check_discovery(hostname, ipaddress):
"ignored: %s: %s" % (check_plugin_name,
config.service_description(hostname,
check_plugin_name, item)))
- _set_rediscovery_flag(hostname, need_rediscovery)
if need_rediscovery:
+ if config.is_cluster(hostname):
+ for nodename in config.nodes_of(hostname):
+ _set_rediscovery_flag(nodename)
+ else:
+ _set_rediscovery_flag(hostname)
infotexts.append("rediscovery scheduled")
# Add data source information to check results
@@ -436,7 +449,7 @@ def default_discovery_check_parameters():
}
-def _set_rediscovery_flag(hostname, need_rediscovery):
+def _set_rediscovery_flag(hostname):
def touch(filename):
if not os.path.exists(filename):
f = open(filename, "w")
@@ -444,16 +457,10 @@ def _set_rediscovery_flag(hostname, need_rediscovery):
autodiscovery_dir = cmk.utils.paths.var_dir + '/autodiscovery'
discovery_filename = os.path.join(autodiscovery_dir, hostname)
- if need_rediscovery:
- if not os.path.exists(autodiscovery_dir):
- os.makedirs(autodiscovery_dir)
- touch(discovery_filename)
- else:
- if os.path.exists(discovery_filename):
- try:
- os.remove(discovery_filename)
- except OSError:
- pass
+
+ if not os.path.exists(autodiscovery_dir):
+ os.makedirs(autodiscovery_dir)
+ touch(discovery_filename)
class DiscoveryTimeout(Exception):
@@ -595,12 +602,19 @@ def _discover_marked_host(hostname, all_hosts, now_ts,
oldest_queued):
# compatible with the automation code
console.verbose(" failed: host is offline\n")
else:
- new_services, removed_services, kept_services, total_services = result
- if new_services == 0 and removed_services == 0 and kept_services ==
total_services:
+ if result["self_new"] == 0 and\
+ result["self_removed"] == 0 and\
+ result["self_kept"] == result["self_total"] and\
+ result["clustered_new"] == 0 and\
+ result["clustered_vanished"] == 0:
console.verbose(" nothing changed.\n")
else:
- console.verbose(
- " %d new, %d removed, %d kept, %d total services.\n" %
(tuple(result)))
+ console.verbose(" %(self_new)s new, %(self_removed)s removed,
"\
+ "%(self_kept)s kept, %(self_total)s total services.
"\
+ "clustered new %(clustered_new)s, clustered vanished
%(clustered_vanished)s" % result)
+
+ # Note: Even if the actual mark-for-discovery flag may have been created
by a cluster host,
+ # the activation decision is based on the discovery configuration
of the node
if redisc_params["activation"]:
services_changed = True