Module: check_mk
Branch: master
Commit: 449a6410d10f45d1ed0fd18f96cf1ddd405dabc8
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=449a6410d10f45…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Thu Nov 19 14:18:37 2015 +0100
#2843 FIX Hanging notification scripts could prevent all other notifications
Notification scripts are assumed to finish within short time like a couple
of seconds. If a notification script takes longer or is hanging for ever,
it prevent Check_MK from sending out other notifications. So one hanging
notification could prevent all others.
This has been fixed by introducing a global notification plugin timeout.
All notification plugins which take more than 60 seconds will be killed
so that other notifications can be done again. If you need a different
timeout, you can configure it using the global setting "Notification plugin
timeout".
---
.werks/2843 | 17 ++++++++++
ChangeLog | 1 +
modules/notify.py | 54 +++++++++++++++++++++++-------
web/plugins/wato/globals_notification.py | 10 ++++++
4 files changed, 70 insertions(+), 12 deletions(-)
diff --git a/.werks/2843 b/.werks/2843
new file mode 100644
index 0000000..ce1739d
--- /dev/null
+++ b/.werks/2843
@@ -0,0 +1,17 @@
+Title: Hanging notification scripts could prevent all other notifications
+Level: 2
+Component: notifications
+Compatible: compat
+Version: 1.2.7i4
+Date: 1447938942
+Class: fix
+
+Notification scripts are assumed to finish within short time like a couple
+of seconds. If a notification script takes longer or is hanging for ever,
+it prevent Check_MK from sending out other notifications. So one hanging
+notification could prevent all others.
+
+This has been fixed by introducing a global notification plugin timeout.
+All notification plugins which take more than 60 seconds will be killed
+so that other notifications can be done again. If you need a different
+timeout, you can configure it using the global setting "Notification plugin
timeout".
diff --git a/ChangeLog b/ChangeLog
index d8bccc8..f1e39c5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -136,6 +136,7 @@
Notifications:
* 2728 FIX: Removed debug notification script (was not executable)...
+ * 2843 FIX: Hanging notification scripts could prevent all other notifications...
Event Console:
* 2733 Visibility of Event Console events more flexible now...
diff --git a/modules/notify.py b/modules/notify.py
index 5be18a5..1acb97c 100644
--- a/modules/notify.py
+++ b/modules/notify.py
@@ -62,6 +62,7 @@ enable_rulebased_notifications = False
notification_fallback_email = ""
notification_rules = []
notification_bulk_interval = 10 # Check every 10 seconds for ripe bulks
+notification_plugin_timeout = 60
# Notification Spooling.
@@ -1116,6 +1117,9 @@ def path_to_notification_script(plugin):
def call_notification_script(plugin, plugin_context):
core_notification_log(plugin, plugin_context)
+ def plugin_log(s):
+ notify_log(" %s" % s)
+
# The "Pseudo"-Plugin None means builtin plain email
if not plugin:
return notify_via_email(plugin_context)
@@ -1125,26 +1129,52 @@ def call_notification_script(plugin, plugin_context):
if not path:
return 2
- # Export complete context to have all vars in environment.
- # Existing vars are replaced, some already existing might remain
- add_context_to_environment(plugin_context, "NOTIFY_")
+ plugin_log("executing %s" % path)
+ try:
+ set_notification_timeout()
+ p = subprocess.Popen([path], shell=False, stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
env=notification_script_env(plugin_context))
+
+ for line in p.stdout:
+ plugin_log("Output: %s" % line.rstrip().decode('utf-8'))
+
+ exitcode = p.returncode
+ clear_notification_timeout()
+ except NotificationTimeout:
+ plugin_log("Notification plugin did not finish within %d seconds.
Terminating." %
+
notification_plugin_timeout)
+ p.kill()
+ exitcode = 1
- notify_log(" executing %s" % path)
- out = os.popen(path + " 2>&1 </dev/null")
- for line in out:
- notify_log("Output: %s" % line.rstrip().decode('utf-8'))
- exitcode = out.close()
if exitcode:
- notify_log("Plugin exited with code %d" % (exitcode >> 8))
+ plugin_log("Plugin exited with code %d" % exitcode)
else:
exitcode = 0
- # Clear environment again.
- remove_context_from_environment(plugin_context, "NOTIFY_")
-
return exitcode
+# Construct the environment for the notification script
+def notification_script_env(plugin_context):
+ return dict([ ("NOTIFY_" + k, v.encode("utf-8"))
+ for k, v in plugin_context.items() ])
+
+
+class NotificationTimeout(Exception):
+ pass
+
+
+def handle_notification_timeout(signum, frame):
+ raise NotificationTimeout()
+
+
+def set_notification_timeout():
+ signal.signal(signal.SIGALRM, handle_notification_timeout)
+ signal.alarm(notification_plugin_timeout)
+
+
+def clear_notification_timeout():
+ signal.alarm(0)
#.
# .--Spooling------------------------------------------------------------.
diff --git a/web/plugins/wato/globals_notification.py
b/web/plugins/wato/globals_notification.py
index ad05e57..f63e8d0 100644
--- a/web/plugins/wato/globals_notification.py
+++ b/web/plugins/wato/globals_notification.py
@@ -87,6 +87,16 @@ register_configvar(group,
need_restart = True)
register_configvar(group,
+ "notification_plugin_timeout",
+ Age(
+ title = _("Notification plugin timeout"),
+ help = _("After the configured time notification plugins are being
interrupted."),
+ default_value = 60,
+ minvalue = 1,
+ ),
+ domain = "check_mk")
+
+register_configvar(group,
"notification_logging",
Transform(
DropdownChoice(