Module: check_mk
Branch: master
Commit: d98ff03d7af390ff68226cb3bf3902372fcdd7b3
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=d98ff03d7af390…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Sun Jun 18 16:23:56 2017 +0200
4761 FIX Fixed multisite setups with sites using Check_MK/Livestatus < 1.4
When updating the central site of a distributed setup to Check_MK 1.4.* and
leaving the slave sites on older versions the other sites were marked as
dead sites while rendering the page.
This issue was caused by the tactical overview snapin that tried to calculate
the numbers for the "Events" of the Event Console. The slave sites using
older Check_MK versions do not support that kind of query. Once this query
is made the sites are marked as dead and are also not shown correctly in the
master control and site status snapin.
A possible workaround would be to update all involved sites to Check_MK
1.4.
Also affected are sites that use the Nagios core and have the Event Console
disabled. They still report an error to the GUI. This will be fixed soon.
Change-Id: I0de4cd282c1d06bb160396585ae5ed8e5e0ec959
---
.werks/4761 | 25 +++++++++++++++++++
livestatus/api/python/livestatus.py | 50 +++++++++++++++++++++++++++++++------
web/plugins/sidebar/shipped.py | 13 +++++++++-
3 files changed, 80 insertions(+), 8 deletions(-)
diff --git a/.werks/4761 b/.werks/4761
new file mode 100644
index 0000000..684646a
--- /dev/null
+++ b/.werks/4761
@@ -0,0 +1,25 @@
+Title: Fixed multisite setups with sites using Check_MK/Livestatus < 1.4
+Level: 2
+Component: multisite
+Class: fix
+Compatible: compat
+Edition: cre
+State: unknown
+Version: 1.5.0i1
+Date: 1497795538
+
+When updating the central site of a distributed setup to Check_MK 1.4.* and
+leaving the slave sites on older versions the other sites were marked as
+dead sites while rendering the page.
+
+This issue was caused by the tactical overview snapin that tried to calculate
+the numbers for the "Events" of the Event Console. The slave sites using
+older Check_MK versions do not support that kind of query. Once this query
+is made the sites are marked as dead and are also not shown correctly in the
+master control and site status snapin.
+
+A possible workaround would be to update all involved sites to Check_MK
+1.4.
+
+Also affected are sites that use the Nagios core and have the Event Console
+disabled. They still report an error to the GUI. This will be fixed soon.
diff --git a/livestatus/api/python/livestatus.py b/livestatus/api/python/livestatus.py
index fd51baf..7352ca4 100644
--- a/livestatus/api/python/livestatus.py
+++ b/livestatus/api/python/livestatus.py
@@ -176,6 +176,35 @@ class Helpers:
return result
+
+class Query(object):
+ """This object can be passed to all livestatus methods accepting a
livestatus
+ query. The object can be used to hand over the handling code some flags, for
+ example to influence the error handling during query processing."""
+
+ def __init__(self, query, suppress_exceptions=None):
+ super(Query, self).__init__()
+
+ self._query = self._ensure_unicode(query)
+ self.suppress_exceptions = suppress_exceptions or []
+
+
+ def _ensure_unicode(self, thing):
+ try:
+ return unicode(thing)
+ except UnicodeDecodeError:
+ return thing.decode("utf-8")
+
+
+ def __unicode__(self):
+ return self._query
+
+
+ def __str__(self):
+ return self._query.encode("utf-8")
+
+
+
#.
# .--BaseConnection----------------------------------------------------------.
# | ____ ____ _ _ |
@@ -291,12 +320,16 @@ class BaseConnection:
self.send_query(query, add_headers)
return self.recv_response(query, add_headers)
- def send_query(self, query, add_headers = "", do_reconnect=True):
+ def send_query(self, query_obj, add_headers = "", do_reconnect=True):
+ orig_query = query_obj
+
+ query = "%s" % query_obj
if not self.allow_cache:
query = remove_cache_regex.sub("", query)
- orig_query = query
+
if self.socket == None:
self.connect()
+
if not query.endswith("\n"):
query += "\n"
query += self.auth_header + self.add_headers
@@ -707,6 +740,11 @@ class MultiSiteConnection(Helpers):
"site" : site,
}
+ if isinstance(query, Query):
+ suppress_exceptions = tuple(query.suppress_exceptions)
+ else:
+ suppress_exceptions = tuple()
+
# Then retrieve all answers. We will be as slow as the slowest of all
# connections.
result = []
@@ -721,11 +759,9 @@ class MultiSiteConnection(Helpers):
if self.prepend_site:
r = [ [sitename] + l for l in r ]
result += r
- #except MKLivestatusTableNotFoundError:
- # # In case of multi site queries it may happen that one site knows a
table and
- # # another site does not have this table because it runs an older
version.
- # # Don't mark the site as dead site in such a case.
- # pass
+ except suppress_exceptions:
+ stillalive.append( (sitename, site, connection) )
+ continue
except Exception, e:
self.deadsites[sitename] = {
diff --git a/web/plugins/sidebar/shipped.py b/web/plugins/sidebar/shipped.py
index 3b57db1..2c56c72 100644
--- a/web/plugins/sidebar/shipped.py
+++ b/web/plugins/sidebar/shipped.py
@@ -638,9 +638,20 @@ def get_tactical_overview_data(extra_filter_headers):
stat_only=True,
extra_headers=extra_filter_headers)
+ # Livestatus < 1.4.0 does not know this table. The API will raise a
+ # MKLivestatusTableNotFoundError exception in this case which will mark
+ # the site as dead by default. Adding the exception to the list of
+ # suppressed exception makes livestatus silently ignore this kind of
+ # error. This makes it possible to make livestatus connections with
+ # older Check_MK versions.
+ query = livestatus.Query(
+ event_query,
+ suppress_exceptions=[livestatus.MKLivestatusTableNotFoundError],
+ )
+
try:
sites.live().set_auth_domain("ec")
- event_data = sites.live().query_summed_stats(event_query)
+ event_data = sites.live().query_summed_stats(query)
except livestatus.MKLivestatusNotFoundError:
event_data = [0, 0, 0]
finally: