Module: check_mk
Branch: master
Commit: 882209fb5c596b406068e6478ebcea1b516838cb
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=882209fb5c596b…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Tue Apr 12 08:28:49 2011 +0200
ipmi: allow to ignore certain sensors
This is done via ipmi_ignored_sensors. See man page
of ipmi for details.
---
ChangeLog | 1 +
checkman/ipmi | 24 +++++++++++++++++-------
checks/ipmi | 45 +++++++++++++++++++++++++++++++++++++++++----
3 files changed, 59 insertions(+), 11 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index b524bc2..66d3f27 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -67,6 +67,7 @@
* if/if64: also show perf-o-meter if speed is unknown
* f5_bigip_pool: status of F5 BIP/ip load balancing pools
* f5_bigip_vserver: status of F5 BIP/ip virtual servers
+ * ipmi: new configuration variable ipmi_ignored_sensors (see man page)
1.1.10:
diff --git a/checkman/ipmi b/checkman/ipmi
index 9ca3b46..1c9c249 100644
--- a/checkman/ipmi
+++ b/checkman/ipmi
@@ -7,16 +7,19 @@ description:
This check makes use of the output of a working {ipmitool} on
a Linux system. If the kernel properly supports IPMI and {ipmitool}
is installed, then the agent will output the current state of
- power supplies, cooling devices and temperatures. This check makes
+ power supplies, cooling devices, temperatures and other devices
+ avaiable via IPMI. This check makes
sure that all those devices are working properly.
- Fortunately, IPMI provides not only the current sensor values
- but also their state, so no levels need to be configured for
- this checks.
+ Fortunately, IPMI provides not only the current sensor values but also
+ their state, so no levels need to be configured for this checks.
- This check provides a summarized mode, where all sensors
- appear as one summary check per host and a detailed mode
- with one check per sensor.
+ This check provides a summarized mode, where all sensors appear as one
+ summary check per host and a detailed mode with one check per sensor.
+
+ Please note: on some systems IPMI reports sensors in a non-ok state wheras
+ actually no problem exists. As of version 1.1.11i1 these sensors can be
+ ignored with the configuration variable {ipmi_ignored_sensors}.
item:
{"Summary"} for summary mode, the name of the IPMI item
@@ -44,9 +47,16 @@ examples:
# Make inventory produce detailed IPMI checks
ipmi_summarize = False
+ # Ignore certain types of sensors
+ ipmi_ignored_sensors = [ "Power_Meter", "Virtual_Fan" ]
+
[configuration]
ipmi_ignore_nr (boolean): If set to {True}, sensors
with the state {nr} will be ignored. Default is {False}.
ipmi_summarize (boolean): If set to {False}, the inventory
will create a separate service for each IPMI device. Default
is {True}.
+ipmi_ignored_sensors(list): A list of sensors names that should
+ be ignored when doing inventory (for non-summarized mode) or
+ during check for summarized mode). The check does a {prefix}
+ match on the names.
diff --git a/checks/ipmi b/checks/ipmi
index 3b12ac8..b5c5d08 100644
--- a/checks/ipmi
+++ b/checks/ipmi
@@ -26,6 +26,7 @@
# Example of output from ipmi:
+# <<<ipmi>>>
# ambienttemp 25.800 degrees_C ok na na na 34.800 40.200 na
# bulk.v12-0-s0 11.940 Volts ok na 10.200 na na 13.800 na
# bulk.v3_3-s0 3.360 Volts ok na 3.000 na na 3.600 na
@@ -47,6 +48,7 @@
# p0.v_vdd 1.332 Volts ok 0.792 0.900 0.996 1.596 1.692 1.800
# Yet another host (HP DL 360G5)
+# <<<ipmi>>>
# UID_Light 0.000 unspecified ok na na 0.000 na na na
# Int._Health_LED 0.000 unspecified ok na na 0.000 na na na
# Ext._Health_LED 0.000 unspecified ok na na 0.000 na na na
@@ -68,6 +70,21 @@
# Temp_7 30.000 degrees_C ok na na -64.000 na na na
# Power_Meter 180.000 Watts cr na na 384.000 na na na
+# And this host has some false-criticals (PowerMeter, VirtualFan)
+# <<<ipmi>>>
+# Temp_1 17.000 degrees_C ok 0.000 0.000 0.000 40.000 42.000 46.000
+# Temp_2 40.000 degrees_C ok 0.000 0.000 0.000 0.000 82.000 83.000
+# Temp_3 44.000 degrees_C ok 0.000 0.000 0.000 0.000 82.000 83.000
+# Temp_4 52.000 degrees_C ok 0.000 0.000 0.000 0.000 87.000 92.000
+# Temp_5 46.000 degrees_C ok 0.000 0.000 0.000 0.000 85.000 90.000
+# Temp_6 55.000 degrees_C ok 0.000 0.000 0.000 0.000 85.000 90.000
+# Temp_7 51.000 degrees_C ok 0.000 0.000 0.000 0.000 85.000 90.000
+# Temp_8 58.000 degrees_C ok 0.000 0.000 0.000 0.000 78.000 83.000
+# Temp_9 74.000 degrees_C ok 0.000 0.000 0.000 0.000 110.000 115.000
+# Temp_10 31.000 degrees_C ok 0.000 0.000 0.000 0.000 60.000 65.000
+# Virtual_Fan 19.600 unspecified nc na na na na na na
+# Power_Meter 236.000 Watts cr na na na na na na
+
# IPMI has two operation modes:
# 1. detailed
@@ -79,12 +96,23 @@
ipmi_summarize = True
ipmi_ignore_nr = False # set to True in order to ignore entries with state 'nr'
+ipmi_ignored_sensors = [] # example: [ "Power_Meter", "Virtual_Fan"
]
+
+def ipmi_ignore_entry(name, state):
+ if ipmi_ignore_nr and state == 'nr':
+ return True
+ for e in ipmi_ignored_sensors:
+ if name.startswith(e):
+ return True
+ return False
def inventory_ipmi(checkname, info):
if ipmi_summarize and len(info) > 0:
- return [ ( "Summary", None, None ) ]
+ return [ ( "Summary", None ) ]
else:
- return [ ( line[0], line[1], None ) for line in info if not ipmi_ignore_nr or
line[3] != 'nr' ]
+ return [ ( line[0], None )
+ for line in info
+ if not ipmi_ignore_entry(line[0], line[3]) ]
def check_ipmi(item, params, info):
if item == "Summary":
@@ -94,7 +122,8 @@ def check_ipmi(item, params, info):
def check_ipmi_detailed(item, info):
try:
- for
name,val,unit,status,unrec_low,crit_low,warn_low,warn_high,crit_high,unrec_high in info:
+ for name, val, unit, status, unrec_low, crit_low, \
+ warn_low, warn_high, crit_high, unrec_high in info:
if name == item:
perfdata = [ (name, val + unit) ] # TODO: add warn and crit levels
if status == 'ok':
@@ -115,7 +144,11 @@ def check_ipmi_summarized(info):
ambient_count = 0
ambient_sum = 0.0
try:
- for
name,val,unit,status,unrec_low,crit_low,warn_low,warn_high,crit_high,unrec_high in info:
+ for name, val, unit, status, unrec_low, crit_low, \
+ warn_low, warn_high, crit_high, unrec_high in info:
+ if ipmi_ignore_entry(name, status):
+ continue
+
text = "%s is %s %s" % (name, val, unit)
count += 1
if status == 'nc':
@@ -155,4 +188,8 @@ def check_ipmi_summarized(info):
return (worst_status, "%s - %s" % (statname, infotext), perfdata)
check_info['ipmi'] = (check_ipmi, "IPMI Sensor %s", 1,
inventory_ipmi)
+
+# Make sure, configuration variables needed during check time are present
+# in precompiled code
check_config_variables.append("ipmi_ignore_nr")
+check_config_variables.append("ipmi_ignored_sensors")