Module: check_mk
Branch: master
Commit: dff254ba429aaf596a0dd968e5651ef1c1b158b6
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=dff254ba429aaf…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Thu Apr 2 10:43:57 2015 +0200
#2187 FIX Avoid CLOSE_WAIT sockets for agent connection in case of timeouts
In case of agent timeouts (per default this is set to 60 seconds) when using
keep alive mode (as the CMC does) socket connections could be left over
in <tt>CLOSE_WAIT</tt> state. This is due to a file descriptor not being
closed correctly by Python. This has been fixed.
---
.werks/2187 | 13 +++++++++++++
ChangeLog | 1 +
modules/check_mk_base.py | 22 ++++++++++++++++------
3 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/.werks/2187 b/.werks/2187
new file mode 100644
index 0000000..4f24a4a
--- /dev/null
+++ b/.werks/2187
@@ -0,0 +1,13 @@
+Title: Avoid CLOSE_WAIT sockets for agent connection in case of timeouts
+Level: 1
+Component: core
+Class: fix
+Compatible: compat
+State: unknown
+Version: 1.2.7i1
+Date: 1427964139
+
+In case of agent timeouts (per default this is set to 60 seconds) when using
+keep alive mode (as the CMC does) socket connections could be left over
+in <tt>CLOSE_WAIT</tt> state. This is due to a file descriptor not being
+closed correctly by Python. This has been fixed.
diff --git a/ChangeLog b/ChangeLog
index d56ee58..6f11239 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,6 +21,7 @@
* 2016 FIX: Fixed service discovery / monitoring on hosts which have only piggyback
data (e.g. ESX VMs)...
* 2089 FIX: Debug mode shows details about errors in autochecks as expected now
* 2093 FIX: Fixed handling of check_mk commandline parameter "-c"
+ * 2187 FIX: Avoid CLOSE_WAIT sockets for agent connection in case of timeouts...
Checks & Agents:
* 1665 agent_netapp: New special agent for NetApp monitoring via Web-API...
diff --git a/modules/check_mk_base.py b/modules/check_mk_base.py
index 94d52c1..90100a2 100644
--- a/modules/check_mk_base.py
+++ b/modules/check_mk_base.py
@@ -764,17 +764,27 @@ def get_agent_info_tcp(hostname, ipaddress, port = None):
pass # some old Python versions lack settimeout(). Better ignore than fail
vverbose("Connecting via TCP to %s:%d.\n" % (ipaddress, port))
s.connect((ipaddress, port))
+ # Immediately close sending direction. We do not send any data
+ # s.shutdown(socket.SHUT_WR)
try:
s.setblocking(1)
except:
pass
output = ""
- while True:
- out = s.recv(4096, socket.MSG_WAITALL)
- if out and len(out) > 0:
- output += out
- else:
- break
+ try:
+ while True:
+ out = s.recv(4096, socket.MSG_WAITALL)
+ if out and len(out) > 0:
+ output += out
+ else:
+ break
+ except Exception, e:
+ # Python seems to skip closing the socket under certain
+ # conditions, leaving open filedescriptors and sockets in
+ # CLOSE_WAIT. This happens one a timeout (ALERT signal)
+ s.close()
+ raise
+
s.close()
if len(output) == 0: # may be caused by xinetd not allowing our address
raise MKAgentError("Empty output from agent at TCP port %d" %
port)