Module: check_mk
Branch: master
Commit: d1a6470c9318da8c758302fcbf0f20da9308f8dc
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=d1a6470c9318da…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Wed Mar 13 12:24:42 2013 +0100
move changelog entry
---
ChangeLog | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 1fc95a1..8ff68c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -48,7 +48,6 @@
when processing of current lines needs more time than max execution time
* mounts: ignore multiple occurrances of the same device
* Linux agent: allow cached local/plugins checks (see docu)
- * jolokia_metrics.mem: now able to handle negative/missing max heap values
Livestatus:
* Table statehist: Improved detection of vanished hosts and services.
@@ -74,6 +73,9 @@
* Checks can now omit the typical "OK - " or "WARN -". This text
will be added automatically if missing.
+ Checks & Agents:
+ * FIX: jolokia_metrics.mem - now able to handle negative/missing heap values
+
Livestatus:
* FIX: possible crash with VERY long downtime comments
Module: check_mk
Branch: master
Commit: 1b0384c853c7a1d05543d597835c1b5c39898daf
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=1b0384c853c7a1…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Wed Mar 6 17:23:47 2013 +0100
FIX: tcp_conn_stats: use /proc/net/tcp instead of netstat -tn
---
ChangeLog | 3 +++
agents/check_mk_agent.linux | 4 +++-
checks/tcp_conn_stats | 26 +++++++++++++-------------
3 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 1d82059..4f9c7aa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -106,6 +106,9 @@
* FIX: hr_mem: fix max value in performance data (thanks to Michaël COQUARD)
* FIX: f5_bigip_psu: fix inventory function (returned list instead of tuple)
* FIX: mysql.connections: avoid crash on legacy agent output
+ * FIX: tcp_conn_stats: use /proc/net/tcp instead of netstat -tn. This
+ should avoid massive performance problems on system with many
+ connections
* Linux agent: limit netstat to 10 seconds
* ps: Allow %1, %2, .. instead of %s in process_inventory. That allows
reordering of matched groups
diff --git a/agents/check_mk_agent.linux b/agents/check_mk_agent.linux
index 5eb5e6f..b5e10aa 100755
--- a/agents/check_mk_agent.linux
+++ b/agents/check_mk_agent.linux
@@ -207,7 +207,9 @@ fi
# Number of TCP connections in the various states
echo '<<<tcp_conn_stats>>>'
-waitmax 10 netstat -nt | awk ' /^tcp/ { c[$6]++; } END { for (x in c) { print x, c[x]; } }'
+# waitmax 10 netstat -nt | awk ' /^tcp/ { c[$6]++; } END { for (x in c) { print x, c[x]; } }'
+# New implementation: netstat is very slow for large TCP tables
+awk ' /:/ { c[$4]++; } END { for (x in c) { print x, c[x]; } }' < /proc/net/tcp
# Platten- und RAID-Status von LSI-Controlleren, falls vorhanden
if type cfggen > /dev/null ; then
diff --git a/checks/tcp_conn_stats b/checks/tcp_conn_stats
index 5264154..d85b48a 100644
--- a/checks/tcp_conn_stats
+++ b/checks/tcp_conn_stats
@@ -25,17 +25,17 @@
# Boston, MA 02110-1301 USA.
tcp_conn_stats_states = [
-"ESTABLISHED", # connection up and passing data
-"SYN_SENT", # session has been requested by us; waiting for reply from remote endpoint
-"SYN_RECV", # session has been requested by a remote endpoint for a socket on which we were listening
-"LAST_ACK", # our socket is closed; remote endpoint has also shut down; we are waiting for a final acknowledgement
-"CLOSE_WAIT", # remote endpoint has shut down; the kernel is waiting for the application to close the socket
-"TIME_WAIT", # socket is waiting after closing for any packets left on the network
-"CLOSED", # socket is not being used (FIXME. What does mean?)
-"CLOSING", # our socket is shut down; remote endpoint is shut down; not all data has been sent
-"FIN_WAIT1", # our socket has closed; we are in the process of tearing down the connection
-"FIN_WAIT2", # the connection has been closed; our socket is waiting for the remote endpoint to shut down
-"BOUND", # Socket did a bound() but TCP stack not yet active (Solaris)
+ ( "ESTABLISHED", "01"), # connection up and passing data
+ ( "SYN_SENT", "02"), # session has been requested by us; waiting for reply from remote endpoint
+ ( "SYN_RECV", "03"), # session has been requested by a remote endpoint for a socket on which we were listening
+ ( "LAST_ACK", "09"), # our socket is closed; remote endpoint has also shut down; we are waiting for a final acknowledgement
+ ( "CLOSE_WAIT", "08"), # remote endpoint has shut down; the kernel is waiting for the application to close the socket
+ ( "TIME_WAIT", "06"), # socket is waiting after closing for any packets left on the network
+ ( "CLOSED", "07"), # socket is not being used (FIXME. What does mean?)
+ ( "CLOSING", "0B"), # our socket is shut down; remote endpoint is shut down; not all data has been sent
+ ( "FIN_WAIT1", "04"), # our socket has closed; we are in the process of tearing down the connection
+ ( "FIN_WAIT2", "05"), # the connection has been closed; our socket is waiting for the remote endpoint to shut down
+ ( "BOUND", None), # Socket did a bound() but TCP stack not yet active (Solaris)
]
@@ -50,8 +50,8 @@ def check_tcp_conn_stats(item, params, info):
worst_state = 0
info = []
perfdata = []
- for state in tcp_conn_stats_states:
- num = int(stats.get(state, 0))
+ for state, hex in tcp_conn_stats_states:
+ num = int(stats.get(state, stats.get(hex, 0)))
if num > 0:
infotext = "%s: %d" % (state, num)
else:
Module: check_mk
Branch: master
Commit: e3ca1a68ea5e6ab054d771822e1263e4bd2175d9
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=e3ca1a68ea5e6a…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Tue Mar 12 16:32:11 2013 +0100
Fix two problems in flexible notifications
1. Notification filters from second channel used settings from first.
2. Do not defer notifictations out of the period
3. Correctly return deferred-flag
---
modules/notify.py | 28 ++++++++++++++++------------
1 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/modules/notify.py b/modules/notify.py
index 79549ac..b0e5ab3 100644
--- a/modules/notify.py
+++ b/modules/notify.py
@@ -415,18 +415,18 @@ def notify_via_email(context, write_into_spoolfile):
# 0 : everything fine -> proceed
# 1 : currently not OK -> try to process later on
# >=2: invalid -> discard
-def check_prerequisite(context, entry):
+def should_notify(context, entry):
# Check disabling
if entry.get("disabled"):
notify_log("- Skipping: it is disabled for this user")
- return 2
+ return False
# Check host, if configured
if entry.get("only_hosts"):
hostname = context.get("HOSTNAME")
if hostname not in entry["only_hosts"]:
notify_log(" - Skipping: host '%s' matches non of %s" % (hostname, ", ".join(entry["only_hosts"])))
- return 2
+ return False
# Check service, if configured
if entry.get("only_services"):
@@ -439,14 +439,14 @@ def check_prerequisite(context, entry):
else:
notify_log(" - Skipping: service '%s' matches non of %s" % (
servicedesc, ", ".join(entry["only_services"])))
- return 2
+ return False
# Check notification type
event, allowed_events = check_notification_type(context, entry["host_events"], entry["service_events"])
if event not in allowed_events:
notify_log(" - Skipping: wrong notification type %s (%s), only %s are allowed" %
(event, context["NOTIFICATIONTYPE"], ",".join(allowed_events)) )
- return 2
+ return False
# Check notification number (in case of repeated notifications/escalations)
if "escalation" in entry:
@@ -458,34 +458,38 @@ def check_prerequisite(context, entry):
if notification_number < from_number or notification_number > to_number:
notify_log(" - Skipping: notification number %d does not lie in range %d ... %d" %
(notification_number, from_number, to_number))
- return 2
+ return False
if "timeperiod" in entry:
timeperiod = entry["timeperiod"]
if timeperiod and timeperiod != "24X7":
if not check_timeperiod(timeperiod):
notify_log(" - Skipping: time period %s is currently not active" % timeperiod)
- return 1
- return 0
+ return False
+ return True
def notify_flexible(context, notification_table, write_into_spoolfile):
- result = 2
+ should_retry = False
for entry in notification_table:
plugin = entry["plugin"]
notify_log("Plugin: %s" % plugin)
- result = check_prerequisite(context, notification_table[0])
- if result > 0:
+ if not should_notify(context, entry):
continue
if plugin is None:
result = notify_via_email(context, write_into_spoolfile)
else:
result = call_notification_script(plugin, entry.get("parameters", []), context, write_into_spoolfile)
+ if result == 1:
+ should_retry = True
# The exit_code is only relevant when processing spoolfiles
- return result
+ if should_retry:
+ return 1
+ else:
+ return 0