Module: check_mk
Branch: master
Commit: fa4968a07aee9be6d74142eb10d9b7eb0d14a093
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=fa4968a07aee9b…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Mon Dec 19 15:12:30 2016 +0100
Another addon for werk 4156
Change-Id: Ib8bab97052a28188c0bfb12efcc50bad479a4c6c
---
bin/mkeventd | 32 ++++++++++++++++++++------------
1 file changed, 20 insertions(+), 12 deletions(-)
diff --git a/bin/mkeventd b/bin/mkeventd
index 978169a..8e94a9d 100755
--- a/bin/mkeventd
+++ b/bin/mkeventd
@@ -2725,26 +2725,35 @@ class EventServer:
# stop_overflow_notify Stop creating new events, create overflow event, notfy
# delete_oldest Delete oldest event, create new event
# protected by lock_eventstatus
+
+ # Returns False if the event has been created and actions should be
+ # performed on that event
def _handle_event_limit(self, ty, event):
assert ty in [ "overall", "by_rule", "by_host" ]
- num = g_event_status.get_num_open_events_by(ty, event)
+ num_already_open = g_event_status.get_num_open_events_by(ty, event)
limit = g_config["event_limit"][ty]["limit"]
action = g_config["event_limit"][ty]["action"]
- verbose(" Type: %s, Open events: %d, Limit: %d" % (ty, num, limit))
+ verbose(" Type: %s, already open events: %d, Limit: %d" % (ty, num_already_open, limit))
- # Check limit under assumption that one more event would be opened
- below_limit = num < (limit-1)
- above_limit = num >= limit
+ # Limit not reached: add new event
+ if num_already_open < limit:
+ num_already_open += 1 # after adding this event
- if below_limit:
- return False # Fine. Hand over to next check.
+ # Limit even then still not reached: we are fine
+ if num_already_open < limit:
+ return False
+ # Delete oldest messages if that is the configure method of keeping the limit
if action == "delete_oldest":
- g_event_status.remove_oldest_event(ty, event)
- return False # Should have solved the issue. Hand over to next check.
+ while num_already_open > limit:
+ g_perfcounters.count("overflows")
+ g_event_status.remove_oldest_event(ty, event)
+ num_already_open -= 1
+ return False
- if above_limit:
+ # Limit reached already in the past: Simply drop silently
+ if num_already_open > limit:
# Just log in verbose mode! Otherwise log file will be flooded
verbose(" Skip processing because limit is already in effect")
g_perfcounters.count("overflows")
@@ -2754,7 +2763,6 @@ class EventServer:
# This is the event which reached the limit, allow creation of it. Further
# events will be stopped.
- g_event_status.new_event(event)
# Perform one time actions
overflow_event = self._create_overflow_event(ty, event)
@@ -2767,7 +2775,7 @@ class EventServer:
log(" Creating overflow notification")
do_notify(overflow_event)
- return True # Just reached limit. Created this last event. Stop processing.
+ return False
def _create_overflow_event(self, ty, event):
Module: check_mk
Branch: master
Commit: 998ac872458d92aeaf38c4f2d2bfdf5cd5ae2797
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=998ac872458d92…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Mon Dec 19 14:12:47 2016 +0100
EC: small logging improvement in EC limit handling
Change-Id: I4110315cdb7f4db29c39d1406225fb768eb89f8c
---
bin/mkeventd | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/bin/mkeventd b/bin/mkeventd
index e6f7358..978169a 100755
--- a/bin/mkeventd
+++ b/bin/mkeventd
@@ -2746,11 +2746,11 @@ class EventServer:
if above_limit:
# Just log in verbose mode! Otherwise log file will be flooded
- verbose(" Skip processing because limit is in effect")
+ verbose(" Skip processing because limit is already in effect")
g_perfcounters.count("overflows")
return True # Prevent creation and prevent one time actions (below)
- log(" The limit has been reached")
+ log(" The %s limit has been reached" % ty)
# This is the event which reached the limit, allow creation of it. Further
# events will be stopped.
Module: check_mk
Branch: master
Commit: 4aeb412d483bfad1a96ee76ae135fb63b8ca69c0
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=4aeb412d483bfa…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Mon Dec 19 14:09:17 2016 +0100
4156 FIX Fix off-by-one error in EC limit handling
If the limit was e.g. 10 in fact 11 message would have been
allowed (<b>plus</b> the overflow event). This has been fixed.
Change-Id: I0890b7899dd29056f717c115c889d74e17511421
---
.werks/4156 | 10 ++++++++++
ChangeLog | 1 +
bin/mkeventd | 5 +++--
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/.werks/4156 b/.werks/4156
new file mode 100644
index 0000000..eb02a8b
--- /dev/null
+++ b/.werks/4156
@@ -0,0 +1,10 @@
+Title: Fix off-by-one error in EC limit handling
+Level: 1
+Component: ec
+Compatible: compat
+Version: 1.4.0i4
+Date: 1482152918
+Class: fix
+
+If the limit was e.g. 10 in fact 11 message would have been
+allowed (<b>plus</b> the overflow event). This has been fixed.
diff --git a/ChangeLog b/ChangeLog
index 23d9953..122a8dd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -15,6 +15,7 @@
* 4154 Do not send notifications if host found and currently in downtime...
NOTE: Please refer to the migration notes!
* 4155 FIX: Do not perform actions on events if overflow limit is active...
+ * 4156 FIX: Fix off-by-one error in EC limit handling...
1.4.0i3:
diff --git a/bin/mkeventd b/bin/mkeventd
index 4ee5a5d..e6f7358 100755
--- a/bin/mkeventd
+++ b/bin/mkeventd
@@ -2733,8 +2733,9 @@ class EventServer:
action = g_config["event_limit"][ty]["action"]
verbose(" Type: %s, Open events: %d, Limit: %d" % (ty, num, limit))
- below_limit = num < limit
- above_limit = num > limit
+ # Check limit under assumption that one more event would be opened
+ below_limit = num < (limit-1)
+ above_limit = num >= limit
if below_limit:
return False # Fine. Hand over to next check.
Module: check_mk
Branch: master
Commit: f7808527d309eafeb9da8be284cd0184a0fe6184
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=f7808527d309ea…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Mon Dec 19 14:06:49 2016 +0100
4155 FIX Do not perform actions on events if overflow limit is active
If the EC blocks a new event because of an active overflow limit
then it now correctly also omit any configured actions (i.e.
a notification) on that event.
Previously actions would be executed anyway - that way undermining
the overflow protection. Furthermore the action "Send notifiction"
would run into an exception because of a missing event ID.
Change-Id: I5f4f3902a336f9c635544027933f0d53e51fb359
---
.werks/4155 | 15 +++++++++++++++
ChangeLog | 1 +
bin/mkeventd | 37 +++++++++++++++++++------------------
3 files changed, 35 insertions(+), 18 deletions(-)
diff --git a/.werks/4155 b/.werks/4155
new file mode 100644
index 0000000..1d61aa7
--- /dev/null
+++ b/.werks/4155
@@ -0,0 +1,15 @@
+Title: Do not perform actions on events if overflow limit is active
+Level: 2
+Component: ec
+Compatible: compat
+Version: 1.4.0i4
+Date: 1482152679
+Class: fix
+
+If the EC blocks a new event because of an active overflow limit
+then it now correctly also omit any configured actions (i.e.
+a notification) on that event.
+
+Previously actions would be executed anyway - that way undermining
+the overflow protection. Furthermore the action "Send notifiction"
+would run into an exception because of a missing event ID.
diff --git a/ChangeLog b/ChangeLog
index eb242a5..23d9953 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -14,6 +14,7 @@
Event Console:
* 4154 Do not send notifications if host found and currently in downtime...
NOTE: Please refer to the migration notes!
+ * 4155 FIX: Do not perform actions on events if overflow limit is active...
1.4.0i3:
diff --git a/bin/mkeventd b/bin/mkeventd
index ae6bb11..4ee5a5d 100755
--- a/bin/mkeventd
+++ b/bin/mkeventd
@@ -2177,16 +2177,14 @@ class EventServer:
else:
event["phase"] = "open"
- with lock_eventstatus:
- self.new_event_respecting_limits(event)
-
- if event["phase"] == "open":
- event_has_opened(rule, event)
- if rule.get("autodelete"):
- event["phase"] = "closed"
- log_event_history(event, "AUTODELETE")
- with lock_eventstatus:
- g_event_status.remove_event(event)
+ if self.new_event_respecting_limits(event):
+ if event["phase"] == "open":
+ event_has_opened(rule, event)
+ if rule.get("autodelete"):
+ event["phase"] = "closed"
+ log_event_history(event, "AUTODELETE")
+ with lock_eventstatus:
+ g_event_status.remove_event(event)
return
# End of loop over rules.
@@ -2704,18 +2702,21 @@ class EventServer:
# protected by lock_eventstatus
def new_event_respecting_limits(self, event):
- verbose("Checking for event limits")
+ verbose("Checking limit for message from %s (rule '%s')" % (
+ event["host"], event["rule_id"]))
- if self._handle_event_limit("overall", event):
- return
+ with lock_eventstatus:
+ if self._handle_event_limit("overall", event):
+ return False
- if self._handle_event_limit("by_host", event):
- return
+ if self._handle_event_limit("by_host", event):
+ return False
- if self._handle_event_limit("by_rule", event):
- return
+ if self._handle_event_limit("by_rule", event):
+ return False
- g_event_status.new_event(event)
+ g_event_status.new_event(event)
+ return True
# The following actions can be configured:
Module: check_mk
Branch: master
Commit: 98968cdbbde6ba8f3a4cdc3f5373e47f844ab0b7
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=98968cdbbde6ba…
Author: Simon Betz <si(a)mathias-kettner.de>
Date: Mon Dec 19 08:05:34 2016 +0100
4170 local: local check is now clusteraware. Best or worst state in cluster mode can be configured
Change-Id: Iea613ba04877ed54fab98f4846c1015b5bf51142
---
.werks/4170 | 11 +++
ChangeLog | 1 +
checks/local | 138 +++++++++++++++++++++++------------
web/plugins/wato/check_parameters.py | 20 +++++
4 files changed, 125 insertions(+), 45 deletions(-)
diff --git a/.werks/4170 b/.werks/4170
new file mode 100644
index 0000000..1f0eb6d
--- /dev/null
+++ b/.werks/4170
@@ -0,0 +1,11 @@
+Title: local: local check is now clusteraware. Best or worst state in cluster mode can be configured
+Level: 1
+Component: checks
+Compatible: compat
+Version: 1.4.0i4
+Date: 1482131073
+Class: feature
+
+Now your're able to determine the state of local checks on clusters via {{Settings for local checks}}
+in {{Host & Service parameters}}. Within these settings you can choose between best or worst state.
+Default setting is worst state.
diff --git a/ChangeLog b/ChangeLog
index 598017a..eb242a5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,6 +40,7 @@
* 4098 ups_cps_battery, ups_cps_battery.temp, ups_cps_inphase, ups_cps_outphase: Several checks to monitor CPS UPS devices
* 4118 check_bi_aggr: changed check and WATO rule to support Kerberos auth...
* 4104 veeam_tapejobs: New check to monitor Veeam tape jobs
+ * 4170 local: local check is now clusteraware. Best or worst state in cluster mode can be configured
* 3987 FIX: Check_MK Agent Access: Windows agent reported incorrect only from value
* 3952 FIX: diskstat: fixed bug if multipath devices having an alias...
* 3939 FIX: f5_bigip_conns: readded performance data and graphs...
diff --git a/checks/local b/checks/local
index db71272..a73f548 100644
--- a/checks/local
+++ b/checks/local
@@ -99,61 +99,109 @@ def inventory_local(info):
inventory = []
# Lines with P do not need to supply a text
for line in info:
- if len(line) >= 4 or len(line) == 3 and line[0] == 'P':
- inventory.append( (line[1], None) )
+ nodename = line[0]
+ stripped_line = line[1:]
+ if len(stripped_line) >= 4 or len(stripped_line) == 3 and stripped_line[0] == 'P':
+ inventory.append( (stripped_line[1], None) )
else:
- raise MKGeneralException("Invalid line in agent section <<<local>>>: %s" % " ".join(line))
+ raise MKGeneralException("Invalid line in agent section <<<local>>>: %s" % " ".join(stripped_line))
return inventory
+# Some helper functions
+def _parse_local_line(line):
+ if not (len(line) >= 4 or (len(line) == 3 and line[0] == 'P')):
+ return 3, "Incomplete line in local check output: %s" % " ".join(line), []
+
+ statechar = line[0]
+ perftxt = line[2]
+
+ # convert eventually escaped newinfo_line chars to real newinfo_lines
+ # (will be converted back later individually for the different cores)
+ output = " ".join(line[3:]).replace("\\n", "\n")
+
+ perfdata = []
+ if perftxt != "-":
+ for entry in perftxt.split('|'):
+ try:
+ varname, valuetxt = entry.split('=')
+ values = valuetxt.split(';')
+ perfdata.append( tuple([varname] + values) )
+
+ except ValueError:
+ return 3, "Invalid performance data %s in local check output %s" % \
+ (perftxt, " ".join(line)), []
+
+ if statechar == 'P':
+ state, texts = local_compute_state(perfdata)
+ if output:
+ texts = [output] + texts
+ output = ", ".join(texts)
+
+ else:
+ try:
+ state = int(statechar)
+ except:
+ return 3, "Invalid state %s in local check output %s: must be P, 0, 1, 2 or 3" % \
+ (statechar, " ".join(line)), []
+
+ if state not in range(0, 4):
+ output += ", local check has sent invalid state %d" % state
+ state = 3
+
+ return state, output, perfdata
+
+
+def _calculate_local_best_state(collected_stats):
+ states = []
+ infotexts = []
+ perfdatas = []
+ for nodename, attrs in collected_stats.items():
+ for itemname, (state, output, perfdata) in attrs.items():
+ if nodename is not None:
+ output = "On node %s: %s" % (nodename, output)
+ states.append(state)
+ infotexts.append(output)
+ perfdatas += perfdata
+ return min(states), ", ".join(infotexts), perfdatas
+
+
+def _calculate_local_worst_state(collected_stats):
+ for nodename, attrs in collected_stats.items():
+ for itemname, (state, output, perfdata) in attrs.items():
+ if nodename is not None:
+ output = "On node %s: %s" % (nodename, output)
+ yield state, output, perfdata
+
+
def check_local(item, params, info):
+ collected_stats = {}
for line in info:
+ nodename = line[0]
+ stripped_line = line[1:]
# Ignore invalid lines, tolerate bugs in local checks
# of unexperienced users
- if len(line) >= 2 and line[1] == item:
- if not (len(line) >= 4 or (len(line) == 3 and line[0] == 'P')):
- return 3, "Incomplete line in local check output: %s" % " ".join(line)
-
- statechar = line[0]
- perftxt = line[2]
-
- output = " ".join(line[3:])
- # convert eventually escaped newline chars to real newlines
- # (will be converted back later individually for the different cores)
- output = output.replace("\\n", "\n")
-
- perfdata = []
- if perftxt != "-":
- # new: allow multiple perfdata by using | as separator
- for entry in perftxt.split('|'):
- try:
- varname, valuetxt = entry.split('=')
- values = valuetxt.split(';')
- perfdata.append(tuple( [varname] + values ))
- except ValueError:
- return 3, "Invalid performance data %s in local check output %s" % \
- (perftxt, " ".join(line))
- if statechar == 'P':
- state, texts = local_compute_state(perfdata)
- if output:
- texts = [output] + texts
- output = ", ".join(texts)
- else:
- try:
- state = int(statechar)
- except:
- return 3, "Invalid state %s in local check output %s: must be P, 0, 1, 2 or 3" % \
- (statechar, " ".join(line))
+ if len(stripped_line) >= 2 and stripped_line[1] == item:
+ collected_stats.setdefault(nodename, {})
+ collected_stats[nodename].setdefault(item, _parse_local_line(stripped_line))
+
+ if collected_stats == {}:
+ yield 3, "No data found in agent output"
+ return
- if state not in range(0, 4):
- output += ", local check has sent invalid state %d" % state
- state = 3
- return (state, output, perfdata)
+ if params is not None and params.get("outcome_on_cluster", "worst") == "best":
+ yield _calculate_local_best_state(collected_stats)
+ return
+ else:
+ for res in _calculate_local_worst_state(collected_stats):
+ yield res
check_info["local"] = {
- 'inventory_function' : inventory_local,
- 'check_function' : check_local,
- 'service_description' : '%s',
- 'has_perfdata' : True,
+ 'check_function' : check_local,
+ 'inventory_function' : inventory_local,
+ 'service_description' : '%s',
+ 'has_perfdata' : True,
+ 'node_info' : True,
+ 'group' : 'local',
}
diff --git a/web/plugins/wato/check_parameters.py b/web/plugins/wato/check_parameters.py
index 6cb567c..92a622e 100644
--- a/web/plugins/wato/check_parameters.py
+++ b/web/plugins/wato/check_parameters.py
@@ -1423,6 +1423,26 @@ register_rule(group + '/' + subgroup_applications,
)
+register_check_parameters(
+ subgroup_applications,
+ "local",
+ _("Settings for local checks"),
+ Dictionary(
+ elements = [
+ ("outcome_on_cluster", DropdownChoice(choices = [
+ ("worst", _("Worst state")),
+ ("best", _("Best state")),
+ ],
+ title = _("Clusters: Prefered check result of local checks"),
+ help = _("If you're running local checks on clusters via clustered services rule "
+ "you can influence the check result with this rule. You can choose between "
+ "best or worst state. Default setting is worst state."),
+ default_value = "worst"))
+ ]
+ ),
+ TextAscii(title = _("Name of local item")),
+ "dict"
+)
register_check_parameters(