Module: check_mk
Branch: master
Commit: c4369905e1f7a372d632ebb18d30f59f7521e66a
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=c4369905e1f7a3…
Author: Simon Betz <si(a)mathias-kettner.de>
Date: Tue Jul 31 13:17:39 2018 +0200
6387 FIX Fixed staleness of services on piggybacked hosts
Change-Id: Ib8691b07ff601567b8977475a71e8fecd7e9c342
---
.werks/6387 | 21 +++++++++++++
cmk/store.py | 2 --
cmk_base/piggyback.py | 81 ++++++++++++++++++++++++++++++++++++---------------
3 files changed, 78 insertions(+), 26 deletions(-)
diff --git a/.werks/6387 b/.werks/6387
new file mode 100644
index 0000000..8b742af
--- /dev/null
+++ b/.werks/6387
@@ -0,0 +1,21 @@
+Title: Fixed staleness of services on piggybacked hosts
+Level: 1
+Component: checks
+Class: fix
+Compatible: compat
+Edition: cre
+State: unknown
+Version: 1.6.0i1
+Date: 1532692225
+
+The VSphere special agent may provide piggyback data for other hosts.
+Sometimes all piggybacked services on these hosts became stale because
+the piggyback files were incorrectly detected as outdated.
+Other piggyback hosts may also be affected.
+
+Internal notes:
+A piggyback host writes piggyback files and an additional status file.
+The age of each piggyback file is compared to the age of the status file
+in order to detect the validity of the piggyback file.
+Wihtin the cachefile_age function the timestamp of 'now' is initialized
+every time for every file. This may result in disregarded time deltas.
diff --git a/cmk/store.py b/cmk/store.py
index 88f1e3a..e55b67e 100644
--- a/cmk/store.py
+++ b/cmk/store.py
@@ -206,8 +206,6 @@ def save_data_to_file(path, data, pretty=True):
def save_file(path, content, mode=0660):
tmp_path = None
try:
- tmp_path = None
-
# Normally the file is already locked (when data has been loaded before with
lock=True),
# but lock it just to be sure we have the lock on the file.
#
diff --git a/cmk_base/piggyback.py b/cmk_base/piggyback.py
index 1ab48c9..8f72fcd 100644
--- a/cmk_base/piggyback.py
+++ b/cmk_base/piggyback.py
@@ -25,6 +25,7 @@
# Boston, MA 02110-1301 USA.
import os
+import tempfile
import cmk.paths
import cmk.translations
@@ -41,8 +42,12 @@ def get_piggyback_raw_data(piggyback_max_cachefile_age, hostname):
return output
for source_host, piggyback_file_path in
_get_piggyback_files(piggyback_max_cachefile_age, hostname):
- console.verbose("Using piggyback raw data from host %s.\n" %
source_host)
- output += file(piggyback_file_path).read()
+ try:
+ output += file(piggyback_file_path).read()
+ console.verbose("Using piggyback raw data from host %s.\n" %
source_host)
+ except IOError, e:
+ console.verbose("Cannot read piggyback raw data from host %s: %s\n"
% (source_host, e))
+ continue
return output
@@ -88,18 +93,15 @@ def _get_piggyback_files(piggyback_max_cachefile_age, hostname):
# Skip piggyback files that are outdated at all
if file_age > piggyback_max_cachefile_age:
console.verbose("Piggyback file %s is outdated (%d seconds too old).
Skip processing.\n" %
- (piggyback_file_path, file_age - piggyback_max_cachefile_age))
+ (piggyback_file_path, file_age -
piggyback_max_cachefile_age))
continue
- # Skip piggyback files that have not been updated in the last contact
- # with the source host that is currently being handled.
- try:
- source_update_age = _piggyback_source_host_update_age(source_host)
- except MKGeneralException, e:
+ status_file_path = _piggyback_source_status_path(source_host)
+ if not os.path.exists(status_file_path):
console.verbose("Piggyback file %s is outdated (Source not sending
piggyback). Skip processing.\n" % piggyback_file_path)
- continue # No source_status_file exists -> ignore data from this source
+ continue
- if file_age > source_update_age:
+ if _is_piggyback_file_outdated(status_file_path, piggyback_file_path):
console.verbose("Piggyback file %s is outdated (Not updated by source).
Skip processing.\n" % piggyback_file_path)
continue
@@ -108,12 +110,21 @@ def _get_piggyback_files(piggyback_max_cachefile_age, hostname):
return files
-def _piggyback_source_status_path(source_host):
- return os.path.join(cmk.paths.tmp_dir, "piggyback_sources", source_host)
+def _is_piggyback_file_outdated(status_file_path, piggyback_file_path):
+ try:
+ # On POSIX platforms Python reads atime and mtime at nanosecond resolution
+ # but only writes them at microsecond resolution.
+ # (We're using os.utime() in _store_status_file_of())
+ return os.stat(status_file_path)[8] > os.stat(piggyback_file_path)[8]
+ except OSError, e:
+ if e.errno == 2: # No such file or directory
+ return True
+ else:
+ raise
-def _piggyback_source_host_update_age(source_host):
- return cmk_base.utils.cachefile_age(_piggyback_source_status_path(source_host))
+def _piggyback_source_status_path(source_host):
+ return os.path.join(cmk.paths.tmp_dir, "piggyback_sources", source_host)
def _remove_piggyback_file(piggyback_file_path):
@@ -135,21 +146,46 @@ def remove_source_status_file(source_host):
def store_piggyback_raw_data(source_host, piggybacked_raw_data):
- for backedhost, lines in piggybacked_raw_data.items():
- console.verbose("Storing piggyback data for: %s\n" % backedhost)
+ piggyback_file_paths = []
+ for piggybacked_host, lines in piggybacked_raw_data.items():
+ piggyback_file_path = os.path.join(cmk.paths.tmp_dir, "piggyback",
piggybacked_host, source_host)
+ console.verbose("Storing piggyback data for: %s\n" % piggybacked_host)
content = "\n".join(lines) + "\n"
- store.save_file(os.path.join(cmk.paths.tmp_dir, "piggyback",
backedhost, source_host), content)
+ store.save_file(piggyback_file_path, content)
+ piggyback_file_paths.append(piggyback_file_path)
# Store the last contact with this piggyback source to be able to filter outdated
data later
# We use the mtime of this file later for comparision.
# Only do this for hosts that sent piggyback data this turn, cleanup the status file
when no
# piggyback data was sent this turn.
if piggybacked_raw_data:
- store.save_file(_piggyback_source_status_path(source_host), "")
+ status_file_path = _piggyback_source_status_path(source_host)
+ _store_status_file_of(status_file_path, piggyback_file_paths)
else:
remove_source_status_file(source_host)
+def _store_status_file_of(status_file_path, piggyback_file_paths):
+ with tempfile.NamedTemporaryFile("w",
dir=os.path.dirname(status_file_path),
+ prefix=".%s.new" %
os.path.basename(status_file_path),
+ delete=False) as tmp:
+ tmp_path = tmp.name
+ os.chmod(tmp_path, 0660)
+ tmp.write("")
+
+ tmp_stats = os.stat(tmp_path)
+ status_file_times = (tmp_stats.st_atime, tmp_stats.st_mtime)
+ for piggyback_file_path in piggyback_file_paths:
+ try:
+ os.utime(piggyback_file_path, status_file_times)
+ except OSError, e:
+ if e.errno == 2: # No such file or directory
+ continue
+ else:
+ raise
+ os.rename(tmp_path, status_file_path)
+
+
def cleanup_piggyback_files(piggyback_max_cachefile_age):
"""This is a housekeeping job to clean up different old files from
the
piggyback directories.
@@ -242,14 +278,11 @@ def _shall_cleanup_piggyback_file(piggyback_max_cachefile_age,
piggyback_file_pa
if file_age > piggyback_max_cachefile_age:
return "%d seconds too old" % (file_age - piggyback_max_cachefile_age)
- # Skip piggyback files that have not been updated in the last contact
- # with the source host that is currently being handled.
- try:
- source_update_age = _piggyback_source_host_update_age(source_host_name)
- except MKGeneralException, e:
+ status_file_path = _piggyback_source_status_path(source_host_name)
+ if not os.path.exists(status_file_path):
return "Source not sending piggyback"
- if file_age > source_update_age:
+ if _is_piggyback_file_outdated(status_file_path, piggyback_file_path):
return "Not updated by source"
return None