Module: check_mk
Branch: master
Commit: 1b0384c853c7a1d05543d597835c1b5c39898daf
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=1b0384c853c7a1…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Wed Mar 6 17:23:47 2013 +0100
FIX: tcp_conn_stats: use /proc/net/tcp instead of netstat -tn
---
ChangeLog | 3 +++
agents/check_mk_agent.linux | 4 +++-
checks/tcp_conn_stats | 26 +++++++++++++-------------
3 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 1d82059..4f9c7aa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -106,6 +106,9 @@
* FIX: hr_mem: fix max value in performance data (thanks to Michaël COQUARD)
* FIX: f5_bigip_psu: fix inventory function (returned list instead of tuple)
* FIX: mysql.connections: avoid crash on legacy agent output
+ * FIX: tcp_conn_stats: use /proc/net/tcp instead of netstat -tn. This
+ should avoid massive performance problems on system with many
+ connections
* Linux agent: limit netstat to 10 seconds
* ps: Allow %1, %2, .. instead of %s in process_inventory. That allows
reordering of matched groups
diff --git a/agents/check_mk_agent.linux b/agents/check_mk_agent.linux
index 5eb5e6f..b5e10aa 100755
--- a/agents/check_mk_agent.linux
+++ b/agents/check_mk_agent.linux
@@ -207,7 +207,9 @@ fi
# Number of TCP connections in the various states
echo '<<<tcp_conn_stats>>>'
-waitmax 10 netstat -nt | awk ' /^tcp/ { c[$6]++; } END { for (x in c) { print x,
c[x]; } }'
+# waitmax 10 netstat -nt | awk ' /^tcp/ { c[$6]++; } END { for (x in c) { print x,
c[x]; } }'
+# New implementation: netstat is very slow for large TCP tables
+awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }' <
/proc/net/tcp
# Platten- und RAID-Status von LSI-Controlleren, falls vorhanden
if type cfggen > /dev/null ; then
diff --git a/checks/tcp_conn_stats b/checks/tcp_conn_stats
index 5264154..d85b48a 100644
--- a/checks/tcp_conn_stats
+++ b/checks/tcp_conn_stats
@@ -25,17 +25,17 @@
# Boston, MA 02110-1301 USA.
tcp_conn_stats_states = [
-"ESTABLISHED", # connection up and passing data
-"SYN_SENT", # session has been requested by us; waiting for reply from
remote endpoint
-"SYN_RECV", # session has been requested by a remote endpoint for a socket
on which we were listening
-"LAST_ACK", # our socket is closed; remote endpoint has also shut down; we
are waiting for a final acknowledgement
-"CLOSE_WAIT", # remote endpoint has shut down; the kernel is waiting for the
application to close the socket
-"TIME_WAIT", # socket is waiting after closing for any packets left on the
network
-"CLOSED", # socket is not being used (FIXME. What does mean?)
-"CLOSING", # our socket is shut down; remote endpoint is shut down; not
all data has been sent
-"FIN_WAIT1", # our socket has closed; we are in the process of tearing down
the connection
-"FIN_WAIT2", # the connection has been closed; our socket is waiting for the
remote endpoint to shut down
-"BOUND", # Socket did a bound() but TCP stack not yet active (Solaris)
+ ( "ESTABLISHED", "01"), # connection up and passing data
+ ( "SYN_SENT", "02"), # session has been requested by us; waiting
for reply from remote endpoint
+ ( "SYN_RECV", "03"), # session has been requested by a remote
endpoint for a socket on which we were listening
+ ( "LAST_ACK", "09"), # our socket is closed; remote endpoint has
also shut down; we are waiting for a final acknowledgement
+ ( "CLOSE_WAIT", "08"), # remote endpoint has shut down; the kernel
is waiting for the application to close the socket
+ ( "TIME_WAIT", "06"), # socket is waiting after closing for any
packets left on the network
+ ( "CLOSED", "07"), # socket is not being used (FIXME. What
does mean?)
+ ( "CLOSING", "0B"), # our socket is shut down; remote endpoint
is shut down; not all data has been sent
+ ( "FIN_WAIT1", "04"), # our socket has closed; we are in the
process of tearing down the connection
+ ( "FIN_WAIT2", "05"), # the connection has been closed; our
socket is waiting for the remote endpoint to shut down
+ ( "BOUND", None), # Socket did a bound() but TCP stack not yet active
(Solaris)
]
@@ -50,8 +50,8 @@ def check_tcp_conn_stats(item, params, info):
worst_state = 0
info = []
perfdata = []
- for state in tcp_conn_stats_states:
- num = int(stats.get(state, 0))
+ for state, hex in tcp_conn_stats_states:
+ num = int(stats.get(state, stats.get(hex, 0)))
if num > 0:
infotext = "%s: %d" % (state, num)
else: