Module: check_mk
Branch: master
Commit: be74ef90838ca92a010615470ce5d80b1b631766
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=be74ef90838ca9…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Tue Jan 2 15:31:24 2018 +0100
5590 FIX Fixed possible deadlock in shutdown/restart procedure
During the shutdown/restart procedure of the livestatus proxy
daemon a deadlock could occur. It seems this could rarely happen
when the system running the process is under high load together
with a large number of sites (>100) configured.
When this situation occures, you can see several left over
liveproxyd processes (master + several site processes) which
can only be terminated with SIGKILL. When looking at the processes
with strace, you can see hanging futex() calls.
Workaround: In such a situation it is safe to simply kill all those
processes using SIGKILL and start a clean liveproxyd processes after
that.
Change-Id: I56dd9bb0d0f01bbe7a394a74bc9d0d047f95265f
---
.werks/5590 | 22 ++++++++++++++++++++++
tests/livestatus/test_liveproxyd.py | 6 ++++--
2 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/.werks/5590 b/.werks/5590
new file mode 100644
index 0000000..ee69425
--- /dev/null
+++ b/.werks/5590
@@ -0,0 +1,22 @@
+Title: Fixed possible deadlock in shutdown/restart procedure
+Level: 1
+Component: liveproxy
+Compatible: compat
+Edition: cee
+Version: 1.5.0i3
+Date: 1514903032
+Class: fix
+
+During the shutdown/restart procedure of the livestatus proxy
+daemon a deadlock could occur. It seems this could rarely happen
+when the system running the process is under high load together
+with a large number of sites (>100) configured.
+
+When this situation occures, you can see several left over
+liveproxyd processes (master + several site processes) which
+can only be terminated with SIGKILL. When looking at the processes
+with strace, you can see hanging futex() calls.
+
+Workaround: In such a situation it is safe to simply kill all those
+processes using SIGKILL and start a clean liveproxyd processes after
+that.
diff --git a/tests/livestatus/test_liveproxyd.py b/tests/livestatus/test_liveproxyd.py
index b451b2b..ea0662e 100644
--- a/tests/livestatus/test_liveproxyd.py
+++ b/tests/livestatus/test_liveproxyd.py
@@ -6,8 +6,6 @@ import pytest
import signal
from testlib import web
-pytestmark = pytest.mark.skipif(True, reason="liveproxyd remains in an unkillable
state with tons of child processes")
-
@pytest.fixture(scope="module")
def default_cfg(web):
print "Applying default config"
@@ -182,6 +180,9 @@ def test_large_number_of_sites(default_cfg, site):
site.write_file("etc/check_mk/conf.d/liveproxyd-test.mk",
"cmc_livestatus_threads = %d\n" % (num_channels*num_sites + 20))
+ site.write_file("etc/check_mk/multisite.d/sites.mk",
+ "sites.update(%r)\n" % livestatus_api_sites)
+
site.execute(["cmk", "-O"])
# Disable limits of livestatus xinetd service
@@ -218,3 +219,4 @@ def test_large_number_of_sites(default_cfg, site):
finally:
_use_liveproxyd_for_local_site(site, proto="unix")
site.delete_file("etc/check_mk/conf.d/liveproxyd-test.mk")
+ site.delete_file("etc/check_mk/multisite.d/sites.mk")