Module: check_mk
Branch: master
Commit: 8a0ad9a4312439da6ace6ef11e712d8aaf682d5d
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=8a0ad9a4312439…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Wed Jan 13 10:18:08 2016 +0100
CSV import: Cleanup old files from upload tmp dir
---
web/htdocs/wato.py | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/web/htdocs/wato.py b/web/htdocs/wato.py
index 90c2b98..65269ef 100644
--- a/web/htdocs/wato.py
+++ b/web/htdocs/wato.py
@@ -3001,11 +3001,12 @@ class ModeBulkImport(WatoMode):
# Upload the CSV file into a temporary directoy to make it available not only
# for this request. It needs to be available during several potential "confirm"
# steps and then through the upload step.
- # FIXME: How and when to cleanup files which were not completely uploaded?
def _upload_csv_file(self):
if not os.path.exists(self._upload_tmp_path):
make_nagios_directories(self._upload_tmp_path)
+ self._cleanup_old_files()
+
upload_info = self._vs_upload().from_html_vars("_upload")
self._vs_upload().validate_value(upload_info, "_upload")
file_name, mime_type, content = upload_info["file"]
@@ -3022,6 +3023,14 @@ class ModeBulkImport(WatoMode):
html.set_var("do_service_detection", "1")
+ def _cleanup_old_files(self):
+ for f in os.listdir(self._upload_tmp_path):
+ path = self._upload_tmp_path + "/" + f
+ mtime = os.stat(path).st_mtime
+ if mtime < time.time() - 3600:
+ os.unlink(path)
+
+
def _read_csv_file(self):
try:
csv_file = file(self._file_path())
Module: check_mk
Branch: master
Commit: d8f8354cb626d8b7532eed9ef20e8ed1beaf1649
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=d8f8354cb626d8…
Author: Sebastian Herbord <sh(a)mathias-kettner.de>
Date: Tue Jan 12 10:41:49 2016 +0100
#2917 FIX cisco_asa_failover: Will no longer discover on devices where failover is disabled
If failover is disabled on a device, both primary and secondary units are reported as "down".
check_mk reported a warning for that even though there was no problem.
This change requires re-inventory of the affected devices.
---
.werks/2917 | 11 +++++++++++
ChangeLog | 1 +
checks/cisco_asa_failover | 5 +++++
3 files changed, 17 insertions(+)
diff --git a/.werks/2917 b/.werks/2917
new file mode 100644
index 0000000..2e3c75c
--- /dev/null
+++ b/.werks/2917
@@ -0,0 +1,11 @@
+Title: cisco_asa_failover: Will no longer discover on devices where failover is disabled
+Level: 1
+Component: checks
+Compatible: compat
+Version: 1.2.7i4
+Date: 1452591537
+Class: fix
+
+If failover is disabled on a device, both primary and secondary units are reported as "down".
+check_mk reported a warning for that even though there was no problem.
+This change requires re-inventory of the affected devices.
diff --git a/ChangeLog b/ChangeLog
index 5fc634a..e2c3aca 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -178,6 +178,7 @@
* 2898 FIX: smart: Using normalized value for determining reallocated events state...
* 2888 FIX: citrix_controller.licensing: ignoring double data from piggy backs
* 2899 FIX: ps: Finally improved performance impact of perfdata when having a lot of processes...
+ * 2917 FIX: cisco_asa_failover: Will no longer discover on devices where failover is disabled...
Multisite:
* 2684 Added icons for downloading agent data / walks of hosts...
diff --git a/checks/cisco_asa_failover b/checks/cisco_asa_failover
index 361bac2..fb69e36 100644
--- a/checks/cisco_asa_failover
+++ b/checks/cisco_asa_failover
@@ -37,6 +37,11 @@
# [['Failover LAN Interface', '2', 'LAN_FO GigabitEthernet0/0.777'], ['Primary unit', '9', 'Active unit'], ['Secondary unit (this device)', '10', 'Standby unit']]
def inventory_cisco_asa_failover(info):
+ for name, state, state_txt in info:
+ if name.lower() == "failover lan interface" and state == "3":
+ # clustering is disabled
+ return None
+
for deviceentry in info[-2:]:
if "this device" in deviceentry[0]:
return [ (None, None) ]
Module: check_mk
Branch: master
Commit: 0664973c5d123f7ff14b68d6c2e9c697523f7771
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=0664973c5d123f…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Mon Jan 11 09:27:19 2016 +0100
#2899 FIX ps: Finally improved performance impact of perfdata when having a lot of processes
When having a lot of processes, like several thousands, on a system and monitoring them,
this lead to a lot of performance counters being created in Check_MK. The management of
these counters was not very optimal, because not updated counters were only deleted based
on time. So systems with a lot of process creations were handling many non needed counters
during every check interval. This affected CPU and memory usage during checking such systems.
As we get the full process table, we have now changed the logic to automatically delete the
counters of all processes that are not existant anymore. This reduces the number of counters
significantly.
---
.werks/2899 | 17 +++++++++++
ChangeLog | 1 +
checks/ps | 73 ++++++++++++++++++++++++++++++++++++++++++++--
checks/ps.include | 2 +-
modules/check_mk_base.py | 44 +---------------------------
5 files changed, 91 insertions(+), 46 deletions(-)
diff --git a/.werks/2899 b/.werks/2899
new file mode 100644
index 0000000..9b73c9d
--- /dev/null
+++ b/.werks/2899
@@ -0,0 +1,17 @@
+Title: ps: Finally improved performance impact of perfdata when having a lot of processes
+Level: 1
+Component: checks
+Compatible: compat
+Version: 1.2.7i4
+Date: 1452500538
+Class: fix
+
+When having a lot of processes, like several thousands, on a system and monitoring them,
+this lead to a lot of performance counters being created in Check_MK. The management of
+these counters was not very optimal, because not updated counters were only deleted based
+on time. So systems with a lot of process creations were handling many non needed counters
+during every check interval. This affected CPU and memory usage during checking such systems.
+
+As we get the full process table, we have now changed the logic to automatically delete the
+counters of all processes that are not existant anymore. This reduces the number of counters
+significantly.
diff --git a/ChangeLog b/ChangeLog
index 9329466..a1a325a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -176,6 +176,7 @@
* 1320 FIX: fjdarye60_sum: Fixed bug in discovery function
* 2898 FIX: smart: Using normalized value for determining reallocated events state...
* 2888 FIX: citrix_controller.licensing: ignoring double data from piggy backs
+ * 2899 FIX: ps: Finally improved performance impact of perfdata when having a lot of processes...
Multisite:
* 2684 Added icons for downloading agent data / walks of hosts...
diff --git a/checks/ps b/checks/ps
index 78da52d..0721f78 100644
--- a/checks/ps
+++ b/checks/ps
@@ -78,8 +78,77 @@ ANY_USER = None
GRAB_USER = False
-def ps_cleanup_counters(lines):
- pass
+# FIXME: Direct access to g_item_state is not allowed in checks normally,
+# but there is no such API to access and modify it like it is needed here.
+def ps_cleanup_counters(parsed):
+ # remove legacy key used for some kind of caching
+ try:
+ del g_item_state["last.cleared.ps_"]
+ except KeyError:
+ pass
+
+ pids = ps_get_current_pids(parsed)
+
+ for ident in ps_get_counters_to_delete(pids):
+ del g_item_state[ident]
+
+
+# Get the idents of the counters which can be deleted because the process id of
+# the counter is not found anymore in the process table.
+#
+# Handle these formats of idents:
+# Old string based keys: 'ps_stat.pcpu.669': (1448634267.875281, 1),
+# New magic keys: ('ps', None, 'ps_wmic.kernel.692'): (1448633487.573496, 1092007),
+def ps_get_counters_to_delete(pids):
+ counters_to_delete = []
+ for ident, state in g_item_state.iteritems():
+ ident_type = type(ident)
+ if ident_type == tuple and ident[0] == "ps":
+ check_ident = ident[2]
+ elif ident_type != tuple and (ident.startswith("ps_stat") or ident.startswith("ps_wmic")):
+ check_ident = name
+ else:
+ continue
+
+ pid = check_ident.split(".")[-1]
+ if pid not in pids:
+ counters_to_delete.append(ident)
+ return counters_to_delete
+
+
+def ps_get_current_pids(parsed):
+ pids = []
+ for line in parsed:
+ process_info = line[1]
+ if ps_has_extended_perfdata(process_info):
+ pids.append(process_info[4])
+ return pids
+
+
+# Makes sure, that no counter with a give prefix is kept longer
+# than min_keep_seconds * 2. Counter is kept at least min_keep_seconds.
+def clear_counters(counter_name_prefix, min_keep_seconds):
+ global g_item_state
+
+ counters_to_delete = []
+
+ for name, state in g_item_state.iteritems():
+ if type(name) == tuple:
+ counter_name = name[0] # never needed, since only called by ps currently
+ else:
+ counter_name = name
+
+ if type(state) == tuple:
+ timestamp, value = state
+ else:
+ continue # unable to cleanup values without timestamp info, skip
+
+ if counter_name.startswith(counter_name_prefix):
+ if timestamp < remove_if_min_keep_seconds:
+ counters_to_delete.append(name)
+
+ for name in counters_to_delete:
+ del g_item_state[name]
# FIXME: Refactor this function for better readability
diff --git a/checks/ps.include b/checks/ps.include
index 86b3bf6..5577347 100644
--- a/checks/ps.include
+++ b/checks/ps.include
@@ -455,7 +455,7 @@ def check_ps_common(item, params, parsed, cpu_cores = 1, info_name = "processes"
# correct number of DSes. To just look at the first process
# in the agent output to make sure. We assume that at least
# one process is always present.
- extended_perfdata = ps_has_extended_perfdata(parsed[0])
+ extended_perfdata = ps_has_extended_perfdata(parsed[0][1])
if extended_perfdata:
perfdata += [ ("vsz", virtual_size),
diff --git a/modules/check_mk_base.py b/modules/check_mk_base.py
index a83cccb..25a4897 100644
--- a/modules/check_mk_base.py
+++ b/modules/check_mk_base.py
@@ -987,14 +987,7 @@ def load_item_state(hostname):
try:
g_item_state = eval(file(filename).read())
except:
- # Try old syntax
- try:
- lines = file(filename).readlines()
- for line in lines:
- line = line.split()
- g_item_state[' '.join(line[0:-2])] = ( int(line[-2]), int(line[-1]) )
- except:
- g_item_state = {}
+ g_item_state = {}
def save_item_state(hostname):
@@ -1096,41 +1089,6 @@ def last_counter_wrap():
return g_last_counter_wrap
-
-# Makes sure, that no counter with a give prefix is kept longer
-# than min_keep_seconds * 2. Counter is kept at least min_keep_seconds.
-def clear_counters(counter_name_prefix, min_keep_seconds):
- global g_item_state
-
- cleared_key = "last.cleared." + counter_name_prefix
- if cleared_key in g_item_state:
- last_cleared, none = g_item_state[cleared_key]
- if last_cleared + min_keep_seconds > time.time():
- return # recent enough
- g_item_state[cleared_key] = (time.time(), None)
-
- counters_to_delete = []
- remove_if_min_keep_seconds = time.time() - min_keep_seconds
-
- for name, state in g_item_state.iteritems():
- if type(name) == tuple:
- counter_name = name[0] # never needed, since only called by ps currently
- else:
- counter_name = name
-
- if type(state) == tuple:
- timestamp, value = state
- else:
- continue # unable to cleanup values without timestamp info, skip
-
- if counter_name.startswith(counter_name_prefix):
- if timestamp < remove_if_min_keep_seconds:
- counters_to_delete.append(name)
-
- for name in counters_to_delete:
- del g_item_state[name]
-
-
# Compute average by gliding exponential algorithm
# itemname : unique ID for storing this average until the next check
# this_time : timestamp of new value