Module: check_mk
Branch: master
Commit: 2b9dcb80bc286b74f355d31c804e7781fce1277d
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=2b9dcb80bc286b…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Fri Mar 1 13:39:22 2019 +0100
7193 FIX Background jobs: Fixed possible "pid" KeyError exception
When background jobs terminated during initialization, the status
information of the job was missing necessary information which
led to exceptions in the status overview of the jobs and the
job cleanup code.
In case you experienced such an issue in previous versions, you
could remove the state files from
<tt>var/check_mk/background_jobs/[job]/jobstatus.mk</tt>
to temporarily fix the problem.
Change-Id: I78c7b176eeb3a7fd9438eca28b979f3b1372f877
---
.werks/7193 | 18 ++++++++++++++
cmk/gui/background_job.py | 41 +++++++++++++++++--------------
cmk/gui/gui_background_job.py | 4 +--
tests/unit/cmk/gui/test_background_job.py | 2 +-
4 files changed, 43 insertions(+), 22 deletions(-)
diff --git a/.werks/7193 b/.werks/7193
new file mode 100644
index 0000000..91e31f3
--- /dev/null
+++ b/.werks/7193
@@ -0,0 +1,18 @@
+Title: Background jobs: Fixed possible "pid" KeyError exception
+Level: 1
+Component: multisite
+Class: fix
+Compatible: compat
+Edition: cre
+State: unknown
+Version: 1.6.0i1
+Date: 1551445523
+
+When background jobs terminated during initialization, the status
+information of the job was missing necessary information which
+led to exceptions in the status overview of the jobs and the
+job cleanup code.
+
+In case you experienced such an issue in previous versions, you
+could remove the state files from
<tt>var/check_mk/background_jobs/[job]/jobstatus.mk</tt>
+to temporarily fix the problem.
diff --git a/cmk/gui/background_job.py b/cmk/gui/background_job.py
index b493c57..7a6307c 100644
--- a/cmk/gui/background_job.py
+++ b/cmk/gui/background_job.py
@@ -162,6 +162,7 @@ class BackgroundProcess(BackgroundProcessInterface,
multiprocessing.Process):
self._logger.verbose(
"Initialized background job (Job ID: %s)" %
self._job_parameters["job_id"])
self._jobstatus.update_status({
+ "pid": self.pid,
"state": JobStatus.state_running,
})
@@ -309,21 +310,23 @@ class BackgroundJob(object):
]:
return False
- if "pid" in job_status:
- try:
- p = psutil.Process(job_status["pid"])
- if job_status["state"] == JobStatus.state_initialized:
- # The process was just created, but has/may not been renamed yet
- # Additionally it has no open file handle to the status file
- # The _is_correct_process check will fail in this gray area
- # We consider this scenario as OK, if the start time was recent
enough
- if time.time() - job_status["started"] < 5: # 5
seconds
- return True
-
- if self._is_correct_process(job_status, p):
+ if job_status["pid"] is None:
+ return False
+
+ try:
+ p = psutil.Process(job_status["pid"])
+ if job_status["state"] == JobStatus.state_initialized:
+ # The process was just created, but has/may not been renamed yet
+ # Additionally it has no open file handle to the status file
+ # The _is_correct_process check will fail in this gray area
+ # We consider this scenario as OK, if the start time was recent enough
+ if time.time() - job_status["started"] < 5: # 5 seconds
return True
- except (psutil.NoSuchProcess, psutil.AccessDenied):
- return False
+
+ if self._is_correct_process(job_status, p):
+ return True
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
+ return False
return False
@@ -365,11 +368,11 @@ class BackgroundJob(object):
def _terminate_processes(self):
job_status = self.get_status()
- if not job_status.get("pid"):
+ if job_status["pid"] is None:
return
# Send SIGTERM
- self._logger.debug("Stopping job using SIGTERM \"%s\" (PID:
%d)", self._job_id,
+ self._logger.debug("Stopping job using SIGTERM \"%s\" (PID:
%s)", self._job_id,
job_status["pid"])
try:
process = psutil.Process(job_status["pid"])
@@ -414,7 +417,7 @@ class BackgroundJob(object):
status = self._jobstatus.get_status()
# Some dynamic stuff
- if status.get("state", "") == JobStatus.state_running:
+ if status.get("state", "") == JobStatus.state_running and
status["pid"] is not None:
try:
p = psutil.Process(status["pid"])
if not self._is_correct_process(status, p):
@@ -464,7 +467,7 @@ class BackgroundJob(object):
if p.exitcode == 0:
job_status = self.get_status()
- self._logger.debug("Started job \"%s\" (PID: %s)",
self._job_id, job_status.get("pid"))
+ self._logger.debug("Started job \"%s\" (PID: %s)",
self._job_id, job_status["pid"])
def _prepare_work_dir(self):
self._delete_work_dir()
@@ -474,7 +477,6 @@ class BackgroundJob(object):
try:
p = self._background_process_class(job_parameters)
p.start()
- self._jobstatus.update_status({"pid": p.pid})
except Exception as e:
self._logger.error("Error while starting subprocess: %s", e,
exc_info=True)
os._exit(1)
@@ -502,6 +504,7 @@ class JobStatus(object):
data.setdefault("state", JobStatus.state_initialized)
data.setdefault("duration", 0.0)
+ data.setdefault("pid", None)
data["loginfo"] = {}
for field_id, field_path in [("JobProgressUpdate",
self._progress_update_path),
diff --git a/cmk/gui/gui_background_job.py b/cmk/gui/gui_background_job.py
index 489c933..6f73b32 100644
--- a/cmk/gui/gui_background_job.py
+++ b/cmk/gui/gui_background_job.py
@@ -499,7 +499,7 @@ class JobRenderer(object):
job_status["estimated_duration"]))
for left, right in [
(_("Runtime"), runtime_info),
- (_("PID"), job_status.get("pid", "")),
+ (_("PID"), job_status["pid"] or ""),
(_("Result"),
"<br>".join(loginfo["JobResult"])),
]:
if right is None:
@@ -622,7 +622,7 @@ class JobRenderer(object):
html.td(job_status.get("user", _("Unknown user")),
css="job_owner")
# PID
- html.td(job_status.get("pid", ""), css="job_pid")
+ html.td(job_status["pid"] or "", css="job_pid")
# Druation
html.td(cmk.utils.render.timespan(job_status.get("duration", 0)),
css="job_runtime")
diff --git a/tests/unit/cmk/gui/test_background_job.py
b/tests/unit/cmk/gui/test_background_job.py
index fb09401..a3f51d2 100644
--- a/tests/unit/cmk/gui/test_background_job.py
+++ b/tests/unit/cmk/gui/test_background_job.py
@@ -95,7 +95,7 @@ def test_start_job():
assert status["state"] == background_job.JobStatus.state_initialized
job.start()
- assert job.is_running()
+ testlib.wait_until(job.is_running, timeout=5, interval=0.1)
with pytest.raises(background_job.BackgroundJobAlreadyRunning):
job.start()