Module: check_mk
Branch: master
Commit: 9256d8286d12041815459342d9b658e50f30bb9e
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=9256d8286d1204…
Author: Götz Golla <gg(a)mathias-kettner.de>
Date: Fri Jan 10 17:28:01 2014 +0100
job: check now monitors the time since last start of the job, limits can be configured in WATO
---
.werks/78 | 8 ++++++++
ChangeLog | 1 +
checkman/job | 35 ++++++++++++++++++++++------------
checks/job | 27 +++++++++++++++++++-------
web/plugins/wato/check_parameters.py | 22 +++++++++++++++++++++
5 files changed, 74 insertions(+), 19 deletions(-)
diff --git a/.werks/78 b/.werks/78
new file mode 100644
index 0000000..a66346a
--- /dev/null
+++ b/.werks/78
@@ -0,0 +1,8 @@
+Title: job: check now monitors the time since last start of the job, limits can be configured in WATO
+Level: 1
+Component: checks
+Version: 1.2.5i1
+Date: 1389371210
+Class: feature
+
+
diff --git a/ChangeLog b/ChangeLog
index 9a5addd..a411a03 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -36,6 +36,7 @@
* 0254 agent_vsphere: Make handling of spaces in hostnames of ESX configurable...
* 0077 cmciii.psm_current, cmciii_psm_plugs, cmciii_io, cmciii.access, cmciii.temp, cmciii.can_current, cmciii.sensor, cmciii.state: new sub checks included in one new check cmcmiii superseding and improving several previous checks of the Rittal CMCIII device...
NOTE: Please refer to the migration notes!
+ * 0078 job: check now monitors the time since last start of the job, limits can be configured in WATO
* 0103 FIX: services: Fixed bug with service inventory defined in main.mk...
* 0299 FIX: borcade_mlx_fan: Prettified output, handling "other" state now
* 0300 FIX: cisco_fru_power: Trying not to inventorize not plugged in FRUs...
diff --git a/checkman/job b/checkman/job
index 128b82c..1d46c5c 100644
--- a/checkman/job
+++ b/checkman/job
@@ -5,25 +5,27 @@ license: GPL
distribution: check_mk
description:
This check monitors state and performance information of any linux program
- call like for example regular running cronjobs.
+ call, for example regular running cronjobs.
The check uses information provided by the wrapper program {mk-job}. This
program is shipped with the linux agent and installed to {/usr/bin}.
- {mk-job} is a wrapper which is called instead of the normal program. For
- example if you have a command line {nightly-backup >/dev/null} which gets
- executed by cronjob every night, you can change the command line to
+ {mk-job} is a wrapper which is called instead of the program. For
+ example, if you have a command line {nightly-backup >/dev/null} which gets
+ executed by a cronjob every night, you can change the command line to
{mk-job backup nightly-backup >/dev/null} to let mk-job collect information
- about the job during runtime while the string {backup} is the ident of the
- job to be executed. This ident must be an unique identifier for this job
- on each host. When the job finished, mk-job writes the collected data to
- {/var/lib/check_mk_agent/job/<job-id>}. The agent outputs
- all available data to the Check_MK server.
+ about the job during runtime. In this expression, the string {backup} is
+ the identifier of the job to be executed. It must be a unique identifier
+ for this job on each host. When the job is finished, {mk-job} writes the
+ collected data to {/var/lib/check_mk_agent/job/<identifier>}. The agent sends
+ all these data to the Check_MK server.
- At the moment this check has no parameters. It reports a {CRITICAL}
- state when the exit code of the job is not 0.
+ The check is {CRITICAL} if the exit code of the job is not {0}, or if
+ warning or critical limits for the job age have been reached.
+
+ Limits can be configured with WATO.
item:
- The ident of the job defined by the first argument to {mk-job}.
+ The identifier of the job defined by the first argument to {mk-job}.
inventory:
One check per job will be created.
@@ -36,3 +38,12 @@ perfdata:
{writes}: Number of file system outputs by the process.
{max_res_bytes}: Maximum resident set size of the process during its lifetime.
{avg_mem_bytes}: Average total (data+stack+text) memory use of the process.
+
+[parameters]
+parameters(dict): parameters is a dictionary with one key
+
+ {"age"}: contains a tuple for the warning and critical limits for the time
+ since last start of the job. Units are seconds.
+
+[configuration]
+job_default_levels(dict): This variable is preset to {{ "age": ( 0, 0 ) }}, which means that the limits are disabled
diff --git a/checks/job b/checks/job
index bf58809..046a4fd 100644
--- a/checks/job
+++ b/checks/job
@@ -48,12 +48,16 @@
#max_res_kbytes 1984
#avg_mem_kbytes 0
+factory_settings["job_default_levels"] = {
+ "age": ( 0, 0 ) # disabled as default
+}
+
def inventory_job(info):
inventory = []
for line in info:
if line[0] == '==>':
item = ' '.join(line[1:-1])
- inventory.append( (item, None) )
+ inventory.append( (item, {} ) )
return inventory
def job_parse_real_time(s):
@@ -91,7 +95,8 @@ def job_parse(item, info):
return data
-def check_job(item, _no_params, info):
+def check_job(item, params, info):
+ warn, crit = params.get('age')
job = job_parse(item, info)
if not job:
return 3, 'Got no information for this job'
@@ -124,6 +129,13 @@ def check_job(item, _no_params, info):
display_value = get_age_human_readable(value)
elif key == 'start_time':
display_value = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(value) )
+ job_age = time.time() - value
+ if crit > 0 and job_age >= crit:
+ state = max(state, 2)
+ display_value += "(!!) (more than %s ago)" % get_age_human_readable(crit)
+ elif warn > 0 and job_age >= warn:
+ state = max(state, 1)
+ display_value += "(!!) (more than %s ago)" % get_age_human_readable(warn)
else:
display_value = value
@@ -133,9 +145,10 @@ def check_job(item, _no_params, info):
return state, ', '.join(output), perfdata
check_info["job"] = {
- 'check_function': check_job,
- 'inventory_function': inventory_job,
- 'service_description': 'Job %s',
- 'group': 'job',
- 'has_perfdata': True,
+ 'check_function' : check_job,
+ 'inventory_function' : inventory_job,
+ 'service_description' : 'Job %s',
+ 'default_levels_variable' : 'job_default_levels',
+ 'group' : 'job',
+ 'has_perfdata' : True,
}
diff --git a/web/plugins/wato/check_parameters.py b/web/plugins/wato/check_parameters.py
index e43c7ba..72898f8 100644
--- a/web/plugins/wato/check_parameters.py
+++ b/web/plugins/wato/check_parameters.py
@@ -2483,6 +2483,28 @@ register_check_parameters(
register_check_parameters(
subgroup_applications,
+ "job",
+ _("Age of jobs controlled by mk-job"),
+ Dictionary(
+ elements = [
+ ("age",
+ Tuple(
+ title = _("Maximum time since last start of job execution"),
+ elements = [
+ Age(title = _("Warning if above"), default_value = 0),
+ Age(title = _("Critical if above"), default_value = 0)
+ ]
+ )
+ )]
+ ),
+ TextAscii(
+ title = _("Job name"),
+ ),
+ None
+)
+
+register_check_parameters(
+ subgroup_applications,
"mssql_counters_locks",
_("MSSQL Locks"),
Dictionary(
Module: check_mk
Branch: master
Commit: f3dcca2e2b26332fc2b07baed673637b093d8839
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=f3dcca2e2b2633…
Author: Bernd Stroessenreuther <bs(a)mathias-kettner.de>
Date: Fri Jan 10 14:30:59 2014 +0100
fixed typo in werk 442
---
.werks/442 | 2 +-
ChangeLog | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/.werks/442 b/.werks/442
index 0b9169c..eefc367 100644
--- a/.werks/442
+++ b/.werks/442
@@ -1,4 +1,4 @@
-Title: dell_om_disks: Treat global host spare disks as OK, instead of WARN
+Title: dell_om_disks: Treat global hot spare disks as OK, instead of WARN
Level: 1
Component: checks
Class: fix
diff --git a/ChangeLog b/ChangeLog
index 4f156a2..9cb3276 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -83,7 +83,7 @@
* 0440 FIX: heartbeat_crm: Inventory more gracefully handles case where agent output is invalid...
* 0113 FIX: blade_blades: Now only make inventory for blades that are powered on...
* 0441 FIX: megaraid_bbu: Fix several false alarms and cases where inventory failed
- * 0442 FIX: dell_om_disks: Treat global host spare disks as OK, instead of WARN...
+ * 0442 FIX: dell_om_disks: Treat global hot spare disks as OK, instead of WARN...
* 0443 FIX: brocade_fcport: cope with firmware that does not provide speed information...
* 0322 FIX: timemachine: Check now also works if there are spaces in the name of the backup volume or the hostname
* 0253 FIX: windows agent: fixed crash on processing eventlog records...
Module: check_mk
Branch: master
Commit: d35100dd87a4f08504c2dd9681c4a1275761487c
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=d35100dd87a4f0…
Author: Bernd Stroessenreuther <bs(a)mathias-kettner.de>
Date: Fri Jan 10 11:02:32 2014 +0100
emcvnx_hwstatus: FIX: in EMC VNX 7500 series there is a new type of enclosures called SPE which did break the check because of unexpected output
---
checks/emcvnx_hwstatus | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/checks/emcvnx_hwstatus b/checks/emcvnx_hwstatus
index 0364b56..d2fbd9d 100644
--- a/checks/emcvnx_hwstatus
+++ b/checks/emcvnx_hwstatus
@@ -82,6 +82,9 @@
def parse_emcvnx_hwstatus(info):
parsed = {}
for line in info:
+ # recognice Enclosures by a line like
+ # DAE6S Bus 0 Enclosure 1
+ # with maybe an additional error message if Overall Status is not ok
if len(line) > 3 and line[1] == "Bus" and line[3] == "Enclosure":
encid = line[2] + "/" + line[4]
enc = {}
@@ -90,6 +93,19 @@ def parse_emcvnx_hwstatus(info):
enc["Overall Status"] = line[5].replace("*", "")
else:
enc["Overall Status"] = "No Errors Reported"
+ # recognice Enclosures by a line like
+ # SPE5 Enclosure SPE
+ # with maybe an additional error message if Overall Status is not ok
+ elif len(line) > 2 and line[1] == "Enclosure":
+ encid = line[2]
+ enc = {}
+ parsed[encid] = enc
+ if len(line) > 3:
+ enc["Overall Status"] = line[3].replace("*", "")
+ else:
+ enc["Overall Status"] = "No Errors Reported"
+ # gather additional information about an Enclosure found in one
+ # of the cases above
elif len(line) > 2 and line[-2] == "State:":
if line[0] == "SP":
device = line[0] + " " + line[1]
Module: check_mk
Branch: master
Commit: ccfea1adbeb7fff775ad9f2fe0adeb638e686032
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=ccfea1adbeb7ff…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Fri Jan 10 08:35:44 2014 +0100
Updated bug entries #2125
---
.bugs/2125 | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/.bugs/2125 b/.bugs/2125
new file mode 100644
index 0000000..e013c37
--- /dev/null
+++ b/.bugs/2125
@@ -0,0 +1,14 @@
+Title: EventFilterState() based view filters handle all boxes unticked not as expected
+Component: multisite
+State: open
+Date: 2014-01-10 08:32:57
+Targetversion: 1.2.5i1
+Class: nastiness
+
+The view filter EventFilterState renders checkboxes of things, for example of states. If all boxes
+are unchecked, the filter behaves like all boxes are checked -> no filtering. The user expects that
+this filter setting makes the filter all rows out, means an empty result.
+
+The current implementation might be confusing. Either make the filter work as expected or make the
+filter aumomatically check all boxes when the last checkbox is unchecked. This should also show the
+user that excluding all states is senseless.