Module: check_mk
Branch: master
Commit: 83fa80cf16f593a3b7b76c8231316e8acd4f2ec2
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=83fa80cf16f593…
Author: Óscar Nájera <on(a)mathias-kettner.de>
Date: Wed Dec 5 17:07:39 2018 +0100
6655 FIX PS: Crash when zombie processes are found on Solaris
Solaris agent returns "-" as cputime on zombie processes. This produced a
crash since it cannot be converted to a time. This Werk put such input into
consideration and transforms it to zero cputime, letting the PS check work
normally.
Change-Id: I4c75b829f9dc53843c8e90ae2cbb12cb3cb4c04b
---
.werks/6655 | 13 +++++++++++++
checks/ps | 3 ++-
checks/ps.include | 7 +++++--
tests/unit/checks/test_ps.py | 29 ++++++++++++++++++++++++++---
4 files changed, 46 insertions(+), 6 deletions(-)
diff --git a/.werks/6655 b/.werks/6655
new file mode 100644
index 0000000..31e39df
--- /dev/null
+++ b/.werks/6655
@@ -0,0 +1,13 @@
+Title: PS: Crash when zombie processes are found on Solaris
+Level: 1
+Component: checks
+Compatible: compat
+Edition: cre
+Version: 1.6.0i1
+Date: 1544025714
+Class: fix
+
+Solaris agent returns "-" as cputime on zombie processes. This produced a
+crash since it cannot be converted to a time. This Werk put such input into
+consideration and transforms it to zero cputime, letting the PS check work
+normally.
diff --git a/checks/ps b/checks/ps
index 620813d..e9eb3cc 100644
--- a/checks/ps
+++ b/checks/ps
@@ -230,7 +230,8 @@ def ps_parse_process_entries(parsed):
# Filter out any lines where no process command line is available, e.g.
# [None, u'(<defunct>,,,)']
- parsed = [x for x in parsed if len(x) > 2]
+ # [None, u'(<defunct>,,,)', u'']
+ parsed = [x for x in parsed if len(x) > 2 and x[2]]
return parsed
diff --git a/checks/ps.include b/checks/ps.include
index 6c8acdc..f993407 100644
--- a/checks/ps.include
+++ b/checks/ps.include
@@ -296,12 +296,13 @@ def format_process_list(processes, html_output):
def parse_ps_time(text):
if "-" in text:
tokens = text.split("-")
- days = int(tokens[0])
+ days = int(tokens[0] or 0)
text = tokens[1]
else:
days = 0
- day_secs = sum([factor * int(v) for factor, v in zip([1, 60, 3600],
reversed(text.split(":")))])
+ day_secs = sum(
+ [factor * int(v or 0) for factor, v in zip([1, 60, 3600],
reversed(text.split(":")))])
return 86400 * days + day_secs
@@ -636,6 +637,8 @@ class ProcessAggregator(object):
process.append(("pid", (pid, "")))
else: # Solaris, BSD, aix cpu times
+ if pcpu_text == '-': # Solaris defunct
+ pcpu_text = 0.0
pcpu = float(pcpu_text) * self.core_weight(is_win=False)
self.percent_cpu += pcpu
diff --git a/tests/unit/checks/test_ps.py b/tests/unit/checks/test_ps.py
index 849325f..38646cb 100644
--- a/tests/unit/checks/test_ps.py
+++ b/tests/unit/checks/test_ps.py
@@ -1,3 +1,4 @@
+from itertools import izip_longest
from collections import namedtuple
import pytest
from cmk_base.check_api import MKGeneralException
@@ -21,7 +22,12 @@ def generate_inputs():
(on,1050360,303252,00:14:59/1-03:59:39,9902) emacs
(on,2924232,472252,00:12:05/07:24:15,7912) /usr/lib/firefox/firefox"""),
# solaris (5 entry cmk>=1.5)
- splitter("(root,4056,1512,0.0/52-04:56:05,5689) /usr/lib/ssh/sshd",
node="solaris"),
+ splitter(
+ """(root,4056,1512,0.0/52-04:56:05,5689) /usr/lib/ssh/sshd
+(zombie,0,0,-/-,1952) <defunct>
+(zombie,0,0,-/-,3952)
+(zombie,0,0,-/-,4952) """,
+ node="solaris"),
# windows agent
splitter(
"""(SYSTEM,0,0,0,0,0,0,0,0,1,0) System Idle Process
@@ -80,7 +86,8 @@ result_parse = [
"emacs"],
[None, ("on", "2924232", "472252",
"00:12:05/07:24:15", "7912"),
"/usr/lib/firefox/firefox"]]),
- (1, [["solaris", ("root", "4056", "1512",
"0.0/52-04:56:05", "5689"), "/usr/lib/ssh/sshd"]]),
+ (1, [["solaris", ("root", "4056", "1512",
"0.0/52-04:56:05", "5689"), "/usr/lib/ssh/sshd"],
+ ["solaris", ("zombie", "0", "0",
"-/-", "1952"), "<defunct>"]]),
(1,
[[None, ("SYSTEM", "0", "0", "0",
"0", "0", "0", "0", "0", "1",
"0"), "System Idle Process"],
[
@@ -168,7 +175,7 @@ def test_parse_ps(check_manager, capture, result):
parsed = check.run_parse(capture)
assert parsed[0] == result[0] # cpu_cores
- for out, ref in zip(parsed[1], result[1]):
+ for out, ref in izip_longest(parsed[1], result[1]):
assert out[0] == ref[0]
assert out[1] == check.context["ps_info"](*ref[1])
assert out[2:] == ref[2:]
@@ -252,6 +259,11 @@ PS_DISCOVERY_WATO_RULES = [
"disabled": True,
"description": u"sshd"
}),
+ ({
+ 'default_params': {},
+ 'descr': 'PS counter',
+ 'user': 'zombie',
+ }, [], ["@all"], {}),
]
PS_DISCOVERY_SPECS = [
@@ -291,6 +303,9 @@ PS_DISCOVERY_SPECS = [
("sshd", "~.*sshd", None, None, {
'cpu_rescale_max': None
}),
+ ('PS counter', None, 'zombie', None, {
+ 'cpu_rescale_max': None
+ }),
]
@@ -379,6 +394,11 @@ PS_DISCOVERED_ITEMS = [
"user": None,
'cpu_rescale_max': None,
}),
+ ("PS counter", {
+ 'cpu_rescale_max': None,
+ 'process': None,
+ 'user': 'zombie'
+ }),
("svchost", {
"cpulevels": (90.0, 98.0),
"handle_count": (1000, 2000),
@@ -457,6 +477,9 @@ check_results = [
(0, "0.0% CPU", [("pcpu", 0.0, None, None, None, None)]),
(0, "running for 52 d", []),
]),
+ CheckResult([(0, '1 process [running on solaris]', [('count', 1,
100000, 100000, 0, None)]),
+ (0, '0.0% CPU', [('pcpu', 0.0, None, None, None,
None)]),
+ (0, 'running for 0.00 s', [])]),
CheckResult([
(0, "3 processes", [("count", 3, 100000, 100000, 0, None)]),
(0, "136.26 MB virtual", [("vsz", 139532, 1073741824000,
2147483648000, None, None)]),