Module: check_mk
Branch: master
Commit: 6fb0b9e1c104913d5031091b2c3981a2ff9eb54b
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=6fb0b9e1c10491…
Author: Simon Betz <si(a)mathias-kettner.de>
Date: Mon Nov 27 11:08:14 2017 +0100
5444 FIX SNMP commands: Prevent zombie processes in case of timeouts
Change-Id: I7366ceaefa9b68ddb85388c943a87a4e8b6aac1e
---
.werks/5444 | 20 +++++++++++
cmk_base/classic_snmp.py | 87 +++++++++++++++++++++++++++++++-----------------
2 files changed, 76 insertions(+), 31 deletions(-)
diff --git a/.werks/5444 b/.werks/5444
new file mode 100644
index 0000000..0c3852f
--- /dev/null
+++ b/.werks/5444
@@ -0,0 +1,20 @@
+Title: SNMP commands: Prevent zombie processes in case of timeouts
+Level: 1
+Component: core
+Class: fix
+Compatible: compat
+Edition: cre
+State: unknown
+Version: 1.5.0i2
+Date: 1510582108
+
+This concerns the RAW edition or users which use CEE/CME edition of Check_MK
+with deactivated inline SNMP.
+
+When executing SNMP commands like get, getnext, walk or bulkwalk to get agent data
+from hosts it may happen that these commands remain open as zombie processes.
+Check_MK has been extended to deal with this situation and clean up these processes.
+
+Details: When the command execution takes too long Check_MK sends a SIGTERM to the
+process group of the executed program. After sending the signal Check_MK is now
+waiting for the process to finish.
diff --git a/cmk_base/classic_snmp.py b/cmk_base/classic_snmp.py
index fa4a5d9..f24f404 100644
--- a/cmk_base/classic_snmp.py
+++ b/cmk_base/classic_snmp.py
@@ -57,9 +57,40 @@ def walk(hostname, ip, oid, hex_plain=False, context_name=None):
debug_cmd = [ "''" if a == "" else a for a in command ]
console.vverbose("Running '%s'\n" % " ".join(debug_cmd))
- snmp_process = subprocess.Popen(command, close_fds=True, stdin=open(os.devnull),
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ snmp_process = None
+ exitstatus = None
+ rowinfo = []
+ try:
+ snmp_process = subprocess.Popen(command, close_fds=True, stdin=open(os.devnull),
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ rowinfo = _get_rowinfo_from_snmp_process(snmp_process, hex_plain)
+
+ except MKTimeout:
+ # On timeout exception try to stop the process to prevent child process
"leakage"
+ if snmp_process:
+ os.kill(snmp_process.pid, signal.SIGTERM)
+ snmp_process.wait()
+ raise
+
+ finally:
+ # The stdout and stderr pipe are not closed correctly on a MKTimeout
+ # Normally these pipes getting closed after p.communicate finishes
+ # Closing them a second time in a OK scenario won't hurt neither..
+ if snmp_process:
+ exitstatus = snmp_process.wait()
+ error = snmp_process.stderr.read()
+ snmp_process.stdout.close()
+ snmp_process.stderr.close()
+
+ if exitstatus:
+ console.verbose(tty.red + tty.bold + "ERROR: " + tty.normal +
"SNMP error: %s\n" % error.strip())
+ raise MKSNMPError("SNMP Error on %s: %s (Exit-Code: %d)" % (ip,
error.strip(), exitstatus))
+ return rowinfo
+
+def _get_rowinfo_from_snmp_process(snmp_process, hex_plain):
+ line_iter = snmp_process.stdout.xreadlines()
# Ugly(1): in some cases snmpwalk inserts line feed within one
# dataset. This happens for example on hexdump outputs longer
# than a few bytes. Those dumps are enclosed in double quotes.
@@ -67,36 +98,30 @@ def walk(hostname, ip, oid, hex_plain=False, context_name=None):
# does not end with a double quote, we take the next line(s) as
# a continuation line.
rowinfo = []
- try:
- line_iter = snmp_process.stdout.xreadlines()
- while True:
+ while True:
+ try:
line = line_iter.next().strip()
- parts = line.split('=', 1)
- if len(parts) < 2:
- continue # broken line, must contain =
- oid = parts[0].strip()
- value = parts[1].strip()
- # Filter out silly error messages from snmpwalk >:-P
- if value.startswith('No more variables') or value.startswith('End
of MIB') \
- or value.startswith('No Such Object available') or
value.startswith('No Such Instance currently exists'):
- continue
-
- if value == '"' or (len(value) > 1 and value[0] ==
'"' and (value[-1] != '"')): # to be continued
- while True: # scan for end of this dataset
- nextline = line_iter.next().strip()
- value += " " + nextline
- if value[-1] == '"':
- break
- rowinfo.append((oid, strip_snmp_value(value, hex_plain)))
-
- except StopIteration:
- pass
-
- error = snmp_process.stderr.read()
- exitstatus = snmp_process.wait()
- if exitstatus:
- console.verbose(tty.red + tty.bold + "ERROR: " + tty.normal +
"SNMP error: %s\n" % error.strip())
- raise MKSNMPError("SNMP Error on %s: %s (Exit-Code: %d)" % (ip,
error.strip(), exitstatus))
+ except StopIteration:
+ break
+
+ parts = line.split('=', 1)
+ if len(parts) < 2:
+ continue # broken line, must contain =
+ oid = parts[0].strip()
+ value = parts[1].strip()
+ # Filter out silly error messages from snmpwalk >:-P
+ if value.startswith('No more variables') or value.startswith('End of
MIB') \
+ or value.startswith('No Such Object available') \
+ or value.startswith('No Such Instance currently exists'):
+ continue
+
+ if value == '"' or (len(value) > 1 and value[0] ==
'"' and (value[-1] != '"')): # to be continued
+ while True: # scan for end of this dataset
+ nextline = line_iter.next().strip()
+ value += " " + nextline
+ if value[-1] == '"':
+ break
+ rowinfo.append((oid, strip_snmp_value(value, hex_plain)))
return rowinfo