Module: check_mk
Branch: master
Commit: 7d48e639d05fa0f6c560df88efda394060f68e9a
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=7d48e639d05fa0…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Thu Dec 13 11:55:43 2018 +0100
added WK6695: User synchronization: Fixed scenario where an update thread could get stuck
indefinitely
Change-Id: I692e9d62f4fc3fea82942119ce726a14c4572a19
---
.werks/6695 | 11 ++++++++++
cmk/gui/plugins/userdb/ldap_connector.py | 35 +++++++++++++++++++++++++-------
cmk/gui/watolib.py | 17 ++++++++++------
3 files changed, 50 insertions(+), 13 deletions(-)
diff --git a/.werks/6695 b/.werks/6695
new file mode 100644
index 0000000..57a65e9
--- /dev/null
+++ b/.werks/6695
@@ -0,0 +1,11 @@
+Title: User synchronization: Fixed scenario where an update thread could get stuck
indefinitely
+Level: 1
+Component: wato
+Class: fix
+Compatible: compat
+Edition: cre
+State: unknown
+Version: 1.6.0i1
+Date: 1543835110
+
+In this scenario the background job never finished, preventing any new updates.
diff --git a/cmk/gui/plugins/userdb/ldap_connector.py
b/cmk/gui/plugins/userdb/ldap_connector.py
index c9773c9..8773ddd 100644
--- a/cmk/gui/plugins/userdb/ldap_connector.py
+++ b/cmk/gui/plugins/userdb/ldap_connector.py
@@ -49,6 +49,7 @@
import copy
from multiprocessing.pool import ThreadPool
+from multiprocessing.pool import TimeoutError
import os
import re
import shutil
@@ -2582,16 +2583,33 @@ def synchronize_profiles_to_sites(logger,
profiles_to_synchronize):
logger.info('Credentials changed for %s. Trying to sync to %d sites' %
(", ".join(
profiles_to_synchronize.keys()), len(remote_sites)))
- synchronization_jobs = []
states = sites.states()
- for site_id, site in remote_sites:
- synchronization_jobs.append((states, site_id, site, profiles_to_synchronize))
-
pool = ThreadPool()
- results = pool.map(_sychronize_profile_worker, synchronization_jobs)
- pool.close()
- pool.join()
+ jobs = []
+ for site_id, site in remote_sites:
+ jobs.append(
+ pool.apply_async(_sychronize_profile_worker,
+ ((states, site_id, site, user_id, profile),)))
+
+ results = []
+ start_time = time.time()
+ while time.time() - start_time < 10:
+ for job in jobs[:]:
+ try:
+ results.append(job.get(timeout=0.5))
+ jobs.remove(job)
+ except TimeoutError:
+ pass
+ if not jobs:
+ break
+
+ contacted_sites = set([x[0] for x in remote_sites])
+ working_sites = set([result.site_id for result in results])
+ for site_id in contacted_sites - working_sites:
+ results.append(
+ SynchronizationResult(
+ site_id, error_text=_("No response from update thread"),
failed=True))
for result in results:
if result.error_text:
@@ -2604,6 +2622,9 @@ def synchronize_profiles_to_sites(logger, profiles_to_synchronize):
sites=[result.site_id],
need_restart=False)
+ pool.terminate()
+ pool.join()
+
num_failed = sum([1 for result in results if result.failed])
num_disabled = sum([1 for result in results if result.disabled])
num_succeeded = sum([1 for result in results if result.succeeded])
diff --git a/cmk/gui/watolib.py b/cmk/gui/watolib.py
index 7889ff0..659895b 100644
--- a/cmk/gui/watolib.py
+++ b/cmk/gui/watolib.py
@@ -4570,13 +4570,14 @@ def do_site_login(site_id, name, password):
raise MKAutomationException(response)
-def get_url(url, insecure, auth=None, data=None, files=None):
+def get_url(url, insecure, auth=None, data=None, files=None, timeout=None):
response = requests.post(
url,
data=data,
verify=not insecure,
auth=auth,
files=files,
+ timeout=timeout,
)
response.encoding = "utf-8" # Always decode with utf-8
@@ -4653,7 +4654,7 @@ def sync_changes_before_remote_automation(site_id):
state.get("_status_details"))
-def do_remote_automation(site, command, vars_):
+def do_remote_automation(site, command, vars_, timeout=None):
base_url = site["multisiteurl"]
secret = site.get("secret")
if not secret:
@@ -4666,7 +4667,7 @@ def do_remote_automation(site, command, vars_):
("debug", config.debug and '1' or '')
])
- response = get_url(url, site.get('insecure', False), data=dict(vars_))
+ response = get_url(url, site.get('insecure', False), data=dict(vars_),
timeout=timeout)
if not response:
raise MKAutomationException(_("Empty output from remote site."))
@@ -4856,10 +4857,13 @@ def legacy_push_user_profile_to_site(site, user_id, profile):
])
response = get_url(
- url, site.get('insecure', False), data={
+ url,
+ site.get('insecure', False),
+ data={
'user_id': user_id,
'profile': mk_repr(profile),
- })
+ },
+ timeout=60)
if not response:
raise MKAutomationException(_("Empty output from remote site."))
@@ -4873,7 +4877,8 @@ def legacy_push_user_profile_to_site(site, user_id, profile):
def push_user_profiles_to_site(site, user_profiles):
- return do_remote_automation(site, "push-profiles", [("profiles",
repr(user_profiles))])
+ return do_remote_automation(
+ site, "push-profiles", [("profiles", repr(user_profiles))],
timeout=60)
PushUserProfilesRequest = NamedTuple("PushUserProfilesRequest",
[("user_profiles", dict)])