Module: check_mk
Branch: master
Commit: 6ebb235b65b99074173e9749b881edbf16943bd8
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=6ebb235b65b990…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Thu Feb 11 14:58:17 2016 +0100
#2991 Snapshot restore: Improved robustness and logging
The restore operation tries to complete the snapshot extraction with the rest of
the data when it encounters critical errors in the crucial extract phase.
Error information is logged in the file <tt>~/var/log/web.log</tt>.
This should never happen anyway - there are several mechanisms to prevent this.
---
.werks/2991 | 12 ++++++++++++
ChangeLog | 1 +
web/htdocs/multitar.py | 25 ++++++++++++++++++++-----
3 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/.werks/2991 b/.werks/2991
new file mode 100644
index 0000000..f3186ae
--- /dev/null
+++ b/.werks/2991
@@ -0,0 +1,12 @@
+Title: Snapshot restore: Improved robustness and logging
+Level: 1
+Component: wato
+Compatible: compat
+Version: 1.2.7i4
+Date: 1455198375
+Class: feature
+
+The restore operation tries to complete the snapshot extraction with the rest of
+the data when it encounters critical errors in the crucial extract phase.
+Error information is logged in the file <tt>~/var/log/web.log</tt>.
+This should never happen anyway - there are several mechanisms to prevent this.
diff --git a/ChangeLog b/ChangeLog
index c717b67..2aaafc9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -334,6 +334,7 @@
* 2923 SNMP community in WATO is now always displayed with ******...
* 2924 WATO can now add contact groups from hosts also to services...
* 2902 Hosts can now be added by regular network scans of configured IP ranges...
+ * 2991 Snapshot restore: Improved robustness and logging...
* 2666 FIX: Fix search for global configuration variables: ignore case now
* 2715 FIX: Fixed visibility of BI aggregations in editor
* 2716 FIX: Fixed error reporting on disabled checks page in case of broken man
pages
diff --git a/web/htdocs/multitar.py b/web/htdocs/multitar.py
index d635965..d020186 100644
--- a/web/htdocs/multitar.py
+++ b/web/htdocs/multitar.py
@@ -197,7 +197,7 @@ def extract_domains(tar, domains):
try:
target_dir = domain.get("prefix")
if not target_dir:
- return
+ return []
# The complete tar.gz file never fits in stringIO buffer..
tar.extract(tar_member, restore_dir)
@@ -217,6 +217,8 @@ def extract_domains(tar, domains):
except Exception, e:
return [ "%s - %s" % (domain["title"], str(e)) ]
+ return []
+
def execute_restore(domain, is_pre_restore = True):
if is_pre_restore:
@@ -227,8 +229,9 @@ def extract_domains(tar, domains):
return domain["post_restore"]()
return []
-
total_errors = []
+ logger(LOG_INFO, "Restoring snapshot: %s" % tar.name)
+ logger(LOG_INFO, "Domains: %s" % ", ".join(tar_domains.keys()))
for what, abort_on_error, handler in [
("Permissions", True, lambda domain, tar_member:
check_domain(domain, tar_member)),
("Pre-Restore", True, lambda domain, tar_member:
execute_restore(domain, is_pre_restore = True)),
@@ -239,9 +242,21 @@ def extract_domains(tar, domains):
errors = []
for name, tar_member in tar_domains.items():
if name in domains:
- dom_errors = handler(domains[name], tar_member)
- if dom_errors:
- errors.extend(dom_errors)
+ try:
+ dom_errors = handler(domains[name], tar_member)
+ errors.extend(dom_errors or [])
+ except Exception, e:
+ # This should NEVER happen
+ err_info = "Restore-Phase: %s, Domain: %s\nError: %s" %
(what, name, format_exception())
+ errors.append(err_info)
+ logger(LOG_CRIT, err_info)
+ if abort_on_error == False:
+ # At this state, the restored data is broken.
+ # We still try to apply the rest of the snapshot
+ # Hopefully the log entry helps in identifying the problem..
+ logger(LOG_ALERT, "Snapshot restore FAILED! (possible loss
of snapshot data)")
+ continue
+ break
if errors:
if what == "Permissions":