Module: check_mk
Branch: master
Commit: cfe28fafad99f88e4fb5568f249d7aa103d58e34
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=cfe28fafad99f8…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Tue Aug 5 08:39:14 2014 +0200
#1152 FIX mk-job: The check now captures currently running jobs and their start time
Previously the check did only get data of jobs which already had finished. This
was a bit weird, because the start time and duration of the jobs could be configured
to be monitored and checked, but was not applied during runtime.
You need to update the agent plugin mk-job to make this work correctly.
---
.werks/1152 | 13 +++++++++++++
ChangeLog | 1 +
agents/mk-job | 6 +++---
checks/job | 45 ++++++++++++++++++++++++++++++++++++---------
4 files changed, 53 insertions(+), 12 deletions(-)
diff --git a/.werks/1152 b/.werks/1152
new file mode 100644
index 0000000..05950c8
--- /dev/null
+++ b/.werks/1152
@@ -0,0 +1,13 @@
+Title: mk-job: The check now captures currently running jobs and their start time
+Level: 1
+Component: checks
+Compatible: compat
+Version: 1.2.5i6
+Date: 1407220584
+Class: fix
+
+Previously the check did only get data of jobs which already had finished. This
+was a bit weird, because the start time and duration of the jobs could be configured
+to be monitored and checked, but was not applied during runtime.
+
+You need to update the agent plugin mk-job to make this work correctly.
diff --git a/ChangeLog b/ChangeLog
index 038136c..bdb0be4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -18,6 +18,7 @@
* 1071 FIX: oracle_rman_backups: Only inventorize ARCHIVELOG / DB FULL / DB INCR
entries...
* 0195 FIX: fc_port: Check temporary disabled cause of problems with automatic
detection...
NOTE: Please refer to the migration notes!
+ * 1152 FIX: mk-job: The check now captures currently running jobs and their start
time...
Multisite:
* 1066 Implemented Dashboard Designer...
diff --git a/agents/mk-job b/agents/mk-job
index 04da0c8..5f9ba96 100755
--- a/agents/mk-job
+++ b/agents/mk-job
@@ -59,9 +59,9 @@ if ! type $1 >/dev/null 2>&1; then
exit 1
fi
-date +"start_time %s" > "$OUTPUT_PATH/.$IDENT.running"
-/usr/bin/time -o "$OUTPUT_PATH/.$IDENT.running" --append \
+date +"start_time %s" > "$OUTPUT_PATH/$IDENT.running"
+/usr/bin/time -o "$OUTPUT_PATH/$IDENT.running" --append \
-f "exit_code %x\nreal_time %E\nuser_time %U\nsystem_time %S\nreads
%I\nwrites %O\nmax_res_kbytes %M\navg_mem_kbytes %K\ninvol_context_switches
%c\nvol_context_switches %w" $@
RC=$?
-mv "$OUTPUT_PATH/.$IDENT.running" "$OUTPUT_PATH/$IDENT"
+mv "$OUTPUT_PATH/$IDENT.running" "$OUTPUT_PATH/$IDENT"
exit $RC
diff --git a/checks/job b/checks/job
index 046a4fd..aa47ed7 100644
--- a/checks/job
+++ b/checks/job
@@ -57,7 +57,8 @@ def inventory_job(info):
for line in info:
if line[0] == '==>':
item = ' '.join(line[1:-1])
- inventory.append( (item, {} ) )
+ if not item.endswith('.running'):
+ inventory.append( (item, {} ) )
return inventory
def job_parse_real_time(s):
@@ -71,11 +72,26 @@ def job_parse_real_time(s):
def job_parse(item, info):
found = False
+ found_running = False
data = {}
for line in info:
if ' '.join(line[1:-1]) == item:
found = True
+ elif ' '.join(line[1:-1]) == item + '.running':
+ # There might be a second section per job, the contents of the
+ # <ident>.running file which exists during execution of the job.
+ # We use the start_time from this file.
+ found_running = True
+
+ elif found_running and line[0] == '==>':
+ continue # simply skip over this line
+
+ elif found_running and len(line) == 2:
+ key, val = line
+ if key == 'start_time':
+ data['running_start_time'] = int(val)
+
elif found and line[0] == '==>':
break # Stop at next section
@@ -101,10 +117,28 @@ def check_job(item, params, info):
if not job:
return 3, 'Got no information for this job'
+ def process_start_time(value, state, warn, crit):
+ display_value = time.strftime("%Y-%m-%d %H:%M:%S",
time.localtime(value) )
+ job_age = time.time() - value
+ if crit > 0 and job_age >= crit:
+ state = max(state, 2)
+ display_value += "(!!) (more than %s ago)" %
get_age_human_readable(crit)
+ elif warn > 0 and job_age >= warn:
+ state = max(state, 1)
+ display_value += "(!!) (more than %s ago)" %
get_age_human_readable(warn)
+ return state, display_value
+
state = 0
output = []
perfdata = []
+ if 'running_start_time' in job:
+ output.append('Currently running')
+ state, display_value = process_start_time(job['running_start_time'],
state, warn, crit)
+ output.append('(Started: %s)' % display_value)
+ return state, ' '.join(output)
+
+
txt = 'Exit-Code: %d' % job['exit_code']
if job['exit_code'] != 0:
state = max(state, 2)
@@ -128,14 +162,7 @@ def check_job(item, params, info):
elif key in [ 'real_time', 'user_time', 'system_time' ]:
display_value = get_age_human_readable(value)
elif key == 'start_time':
- display_value = time.strftime("%Y-%m-%d %H:%M:%S",
time.localtime(value) )
- job_age = time.time() - value
- if crit > 0 and job_age >= crit:
- state = max(state, 2)
- display_value += "(!!) (more than %s ago)" %
get_age_human_readable(crit)
- elif warn > 0 and job_age >= warn:
- state = max(state, 1)
- display_value += "(!!) (more than %s ago)" %
get_age_human_readable(warn)
+ state, display_value = process_start_time(value, state, warn, crit)
else:
display_value = value