Module: check_mk
Branch: master
Commit: 7d9cb8e4d7d214446e978c9e82ab20e39922c092
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=7d9cb8e4d7d214…
Author: Bastian Kuhn <bk(a)mathias-kettner.de>
Date: Fri Oct 24 15:51:23 2014 +0200
#1190 statgrab_cpu: Check can now handle parameters
---
.werks/1190 | 9 +++++++
ChangeLog | 1 +
checks/cpu_util.include | 58 +++++++++++++++++++++++++++++++++++++++
checks/kernel | 69 +++++++----------------------------------------
checks/statgrab_cpu | 51 ++++++++++++++---------------------
5 files changed, 98 insertions(+), 90 deletions(-)
diff --git a/.werks/1190 b/.werks/1190
new file mode 100644
index 0000000..3cda189
--- /dev/null
+++ b/.werks/1190
@@ -0,0 +1,9 @@
+Title: statgrab_cpu: Check can now handle parameters
+Level: 1
+Component: checks
+Compatible: compat
+Version: 1.2.5i6
+Date: 1414158659
+Class: feature
+
+
diff --git a/ChangeLog b/ChangeLog
index 1c45197..59f6890 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -74,6 +74,7 @@
* 1442 ups_socomec_out_source: New check for checking the power source of out phases
for Socomec UPSs
* 0662 domino_mailqueues: new check to monitor mail queues in Lotus Domino
* 1188 veeam_client: Check now also outputs ReadSize and TransferedSize...
+ * 1190 statgrab_cpu: Check can now handle parameters
* 1051 FIX: tcp_conn_stats: fix missing performance data...
* 1142 FIX: winperf_ts_sessions: fix computation, check has never really worked
* 1090 FIX: zfsget: fixed exception which happened on incomplete zfs entries
diff --git a/checks/cpu_util.include b/checks/cpu_util.include
index 410d24a..0ef034b 100644
--- a/checks/cpu_util.include
+++ b/checks/cpu_util.include
@@ -77,3 +77,61 @@ def check_cpu_util(util, params, this_time = None):
perfdata += extraperf # reference curve for predictive levels
return state, infotext, perfdata
+# This one can handle user, system and wait
+def check_cpu_util_unix(values, params):
+ this_time = int(time.time())
+ # Compute jiffi-differences of all relevant counters
+ diff_values = []
+ n = 0
+ global g_counters
+ for v in values:
+ n += 1
+ countername = "cpu.util.%d" % n
+ last_time, last_val = g_counters.get(countername, (0, 0))
+ diff_values.append(v - last_val)
+ g_counters[countername] = (this_time, v)
+
+ sum_jiffies = sum(diff_values) # do not account for steal!
+ if sum_jiffies == 0:
+ raise MKCounterWrapped(None, "Too short time difference since last
check")
+ print sum_jiffies
+
+ user = diff_values[0] + diff_values[1] # add user + nice
+ system = diff_values[2]
+ wait = diff_values[3]
+ user_perc = 100.0 * float(user) / float(sum_jiffies)
+ system_perc = 100.0 * float(system) / float(sum_jiffies)
+ wait_perc = 100.0 * float(wait) / float(sum_jiffies)
+ perfdata = [
+ ( "user", "%.3f" % user_perc ),
+ ( "system", "%.3f" % system_perc ),
+ ( "wait", "%.3f" % wait_perc ) ]
+
+ yield 0, "user: %.1f%%, system: %.1f%%" % (user_perc, system_perc),
perfdata
+
+ # Handle level on iowait
+ state = 0
+ if "iowait" in params and params["iowait"] != None:
+ warn, crit = params["iowait"]
+ if wait_perc >= crit:
+ state = 2
+ elif wait_perc >= warn:
+ state = 1
+ yield state, "wait: %.1f%%" % (wait_perc)
+
+ # Total utilization
+ util_total_perc = user_perc + system_perc + wait_perc
+ state = 0
+ levelstext = ""
+ if "util" in params:
+ warn, crit = params["util"]
+ if util_total_perc >= crit:
+ state = 2
+ elif util_total_perc >= warn:
+ state = 1
+ else:
+ state = 0
+ if state:
+ levelstext = " (warn/crit at %.1f%%/%.1f%%)" % (warn, crit)
+
+ yield state, "total: %.1f%%" % util_total_perc + levelstext
diff --git a/checks/kernel b/checks/kernel
index 6135420..e773fee 100644
--- a/checks/kernel
+++ b/checks/kernel
@@ -129,8 +129,7 @@ def kernel_check_cpu_utilization(item, params, info):
# Look for entry beginning with "cpu"
f = [ l for l in info if l[0] == "cpu" ]
if len(f) != 1:
- yield 3, "More than one line with CPU info found. This check is not
cluster-enabled."
- return
+ return 3, "More than one line with CPU info found. This check is not
cluster-enabled."
line = f[0]
if len(line) < 8:
@@ -140,63 +139,14 @@ def kernel_check_cpu_utilization(item, params, info):
# 'cpu' user nice system idle wait hw-int sw-int (steal ...)
# convert number to int
values = [ int(x) for x in line[1:8] ]
- this_time = int(time.time())
-
- # Compute jiffi-differences of all relevant counters
- diff_values = []
- n = 0
- global g_counters
- for v in values:
- n += 1
- countername = "cpu.util.%d" % n
- last_time, last_val = g_counters.get(countername, (0, 0))
- diff_values.append(v - last_val)
- g_counters[countername] = (this_time, v)
-
- sum_jiffies = sum(diff_values[0:7]) # do not account for steal!
- if sum_jiffies == 0:
- raise MKCounterWrapped(None, "Too short time difference since last
check")
-
- user = diff_values[0] + diff_values[1] # add user + nice
- system = diff_values[2]
- wait = diff_values[4]
- user_perc = 100.0 * float(user) / float(sum_jiffies)
- system_perc = 100.0 * float(system) / float(sum_jiffies)
- wait_perc = 100.0 * float(wait) / float(sum_jiffies)
- perfdata = [
- ( "user", "%.3f" % user_perc ),
- ( "system", "%.3f" % system_perc ),
- ( "wait", "%.3f" % wait_perc ) ]
-
- yield 0, "user: %.1f%%, system: %.1f%%" % (user_perc, system_perc),
perfdata
-
- # Handle level on iowait
- state = 0
- if "iowait" in params and params["iowait"] != None:
- warn, crit = params["iowait"]
- if wait_perc >= crit:
- state = 2
- elif wait_perc >= warn:
- state = 1
- yield state, "wait: %.1f%%" % (wait_perc)
-
- # Total utilization
- util_total_perc = user_perc + system_perc + wait_perc
- state = 0
- levelstext = ""
- if "util" in params:
- warn, crit = params["util"]
- if util_total_perc >= crit:
- state = 2
- elif util_total_perc >= warn:
- state = 1
- else:
- state = 0
- if state:
- levelstext = " (warn/crit at %.1f%%/%.1f%%)" % (warn, crit)
-
- yield state, "total: %.1f%%" % util_total_perc + levelstext
-
+ user = values[0]
+ nice = values[1]
+ system = values[2]
+ wait = values[4]
+ hw_int = values[5]
+ sw_int = values[6]
+ idle = 100 - user - system - nice - wait
+ return check_cpu_util_unix([user, nice, system, wait, hw_int, sw_int, idle], params)
check_info["kernel.util"] = {
'check_function': kernel_check_cpu_utilization,
@@ -205,5 +155,6 @@ check_info["kernel.util"] = {
'has_perfdata': True,
'default_levels_variable': 'kernel_util_default_levels',
'group': 'cpu_iowait',
+ 'includes': ['cpu_util.include'],
}
diff --git a/checks/statgrab_cpu b/checks/statgrab_cpu
index 7bb0880..b52e0e2 100644
--- a/checks/statgrab_cpu
+++ b/checks/statgrab_cpu
@@ -28,49 +28,38 @@
def inventory_statgrab_cpu(info):
if len(info) > 1:
- return [(None, None)]
+ return [(None, {})]
def check_statgrab_cpu(item, params, info):
- global g_counters
+ if not params:
+ params = {}
user = 0
+ nice = 0
for var, value in info:
if var == 'iowait':
wait = int(value)
elif var == 'kernel':
system = int(value)
- elif var == 'nice' or var == 'user':
- user += int(value)
+ elif var == 'nice':
+ nice = int(value)
+ elif var == 'user':
+ user = int(value)
elif var == 'total':
total = int(value)
-
- values = [ user, system, wait, total ]
- this_time = int(time.time())
- diff_values = [ ]
- n = 0
- for v in values:
- n += 1
- countername = "cpu.util.%d" % n
- last_time, last_val = g_counters.get(countername, (0, 0))
- diff_values.append(v - last_val)
- g_counters[countername] = (this_time, v)
-
- diff_total = diff_values[3]
- if diff_total == 0:
- return (0, "too short interval")
- user_perc = 100.0 * float(diff_values[0]) / float(diff_total)
- system_perc = 100.0 * float(diff_values[1]) / float(diff_total)
- wait_perc = 100.0 * float(diff_values[2]) / float(diff_total)
- perfdata = [
- ( "user", "%.3f" % user_perc ),
- ( "system", "%.3f" % system_perc ),
- ( "wait", "%.3f" % wait_perc ) ]
- return (0, "user: %2.0f%%, system: %2.0f%%, wait: %2.0f%%" % (user_perc,
system_perc, wait_perc), perfdata)
+ idle = 100 - total
+ # user, nice, system, wait, hw-int, sw-int, idle
+ values = [ user, nice, system, wait, 0, 0, idle ]
+
+ # No return cause of the use of yield
+ return check_cpu_util_unix(values, params)
check_info["statgrab_cpu"] = {
- 'check_function': check_statgrab_cpu,
- 'inventory_function': inventory_statgrab_cpu,
- 'service_description': 'CPU utilization',
- 'has_perfdata': True,
+ 'check_function': check_statgrab_cpu,
+ 'inventory_function': inventory_statgrab_cpu,
+ 'service_description': 'CPU utilization',
+ 'has_perfdata': True,
+ 'includes': ['cpu_util.include'],
+ 'group': 'cpu_iowait',
}