Module: check_mk
Branch: master
Commit: d9f15ea6640bb3da29f78b667dbbf09f4e939cbe
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=d9f15ea6640bb3…
Author: Sebastian Herbord <sh(a)mathias-kettner.de>
Date: Fri Jan 8 08:45:58 2016 +0100
#2915 supermicro.smart: new check to monitor harddisk health on snmp-enabled supermicro
devices
---
.werks/2915 | 9 ++++++
ChangeLog | 1 +
checkman/supermicro.smart | 19 ++++++++++++
checks/supermicro | 74 ++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 102 insertions(+), 1 deletion(-)
diff --git a/.werks/2915 b/.werks/2915
new file mode 100644
index 0000000..864c1d0
--- /dev/null
+++ b/.werks/2915
@@ -0,0 +1,9 @@
+Title: supermicro.smart: new check to monitor harddisk health on snmp-enabled supermicro
devices
+Level: 1
+Component: checks
+Compatible: compat
+Version: 1.2.7i4
+Date: 1452239072
+Class: feature
+
+
diff --git a/ChangeLog b/ChangeLog
index 4cb4f74..4208568 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -75,6 +75,7 @@
* 2872 supermicro, supermicro.sensors: New check to monitor health on snmp-enabled
devices by supermicro
* 2913 fortigate_node, fortigate_node.cpu, fortigate_node.memory,
fortigate_node.sessions: New checks to monitor HA nodes of fortigate firewalls
* 1319 blade_bx_powerfan: Check can now be configured using Wato
+ * 2915 supermicro.smart: new check to monitor harddisk health on snmp-enabled
supermicro devices
* 2660 FIX: fixed windows agent using the wrong working directory...
* 2664 FIX: ps: Speedup in situation with many matching processes...
* 2661 FIX: windows agent: fixed incomplete process list...
diff --git a/checkman/supermicro.smart b/checkman/supermicro.smart
new file mode 100644
index 0000000..019d1a0
--- /dev/null
+++ b/checkman/supermicro.smart
@@ -0,0 +1,19 @@
+title: Supermicro Health Smart
+agents: snmp
+catalog: hw/other
+license: GPL
+distribution: check_mk
+description:
+ This check tests S.M.A.R.T. health reported by snmp-enabled devices
+ from supermicro.
+ Currently this has only been tested with data from a mainboard on
+ windows. In this case an agent has to be enabled through the
+ SuperDoctor III software.
+
+ Status {OK} and {CRIT} are relayed directly from the device,
+ a {WARN} may work but is not documented.
+ Status may also become {UNKNOWN} if the necessary driver is
+ uninstalled.
+
+inventory:
+ One item is reported per SMART-enabled harddisk.
diff --git a/checks/supermicro b/checks/supermicro
index 17de2a3..5b91fdf 100644
--- a/checks/supermicro
+++ b/checks/supermicro
@@ -60,6 +60,16 @@
# .1.3.6.1.4.1.10876.2.3 No problem.
+#.
+# .--Health--------------------------------------------------------------.
+# | _ _ _ _ _ |
+# | | | | | ___ __ _| | |_| |__ |
+# | | |_| |/ _ \/ _` | | __| '_ \ |
+# | | _ | __/ (_| | | |_| | | | |
+# | |_| |_|\___|\__,_|_|\__|_| |_| |
+# | |
+# '----------------------------------------------------------------------'
+
def inventory_supermicro_health(info):
if info[1]:
return [(None, None)]
@@ -72,7 +82,7 @@ def check_supermicro_health(_no_item, _no_params, info):
check_info['supermicro'] = {
'check_function' : check_supermicro_health,
'inventory_function' : inventory_supermicro_health,
- 'service_description' : "Health",
+ 'service_description' : "Overall Hardware Health",
'has_perfdata' : False,
'snmp_scan_function' : lambda oid: oid(".1.3.6.1.2.1.1.2.0") ==
".1.3.6.1.4.1.311.1.1.3.1.2",
'snmp_info' : [
@@ -87,10 +97,23 @@ check_info['supermicro'] = {
(".1.3.6.1.4.1.10876.2", [2, # smHealthAllinoneStatus
3, # smHealthAllinoneMsg
]),
+ (".1.3.6.1.4.1.10876.100.1.4.1", [1, # diskSerialNumber
+ 2, # diskName
+ 4]), # diskSmartStatus
]
}
+#.
+# .--Sensors-------------------------------------------------------------.
+# | ____ |
+# | / ___| ___ _ __ ___ ___ _ __ ___ |
+# | \___ \ / _ \ '_ \/ __|/ _ \| '__/ __| |
+# | ___) | __/ | | \__ \ (_) | | \__ \ |
+# | |____/ \___|_| |_|___/\___/|_| |___/ |
+# | |
+# '----------------------------------------------------------------------'
+
def inventory_supermicro_sensors(info):
for name, sensor_type, reading, high, low, unit, status in info[0]:
yield name, None
@@ -152,6 +175,7 @@ def check_supermicro_sensors(item, _no_params, info):
return (worst_status(status_high, status_low, dev_status),
"%s%s" % (reading, unit), perfdata)
+
check_info['supermicro.sensors'] = {
'check_function' : check_supermicro_sensors,
'inventory_function' : inventory_supermicro_sensors,
@@ -159,3 +183,51 @@ check_info['supermicro.sensors'] = {
'has_perfdata' : True,
}
+
+#.
+# .--SMART---------------------------------------------------------------.
+# | ____ __ __ _ ____ _____ |
+# | / ___|| \/ | / \ | _ \_ _| |
+# | \___ \| |\/| | / _ \ | |_) || | |
+# | ___) | | | |/ ___ \| _ < | | |
+# | |____/|_| |_/_/ \_\_| \_\|_| |
+# | |
+# '----------------------------------------------------------------------'
+
+
+def format_item_supermicro_smart(name):
+ return name.replace(r"\\\\.\\", "")
+
+
+def inventory_supermicro_smart(info):
+ for serial, name, status in info[2]:
+ # status 3 indicates unknown, which may indicate missing driver support
+ if status != "3":
+ yield format_item_supermicro_smart(name), None
+
+
+def check_supermicro_smart(item, _no_params, info):
+ # note (only status 0 (OK) and 2 (Crit) are documented.
+ # status 3 appears to indicate "unknown" as observed by a user.
+ # It's likely - but not verified - that status 1 would indicate a non-
+ # critical problem if it's used at all)
+ status_map = {
+ "0": "Healthy",
+ "1": "Warning",
+ "2": "Critical",
+ "3": "Unknown"
+ }
+ for serial, name, status in info[2]:
+ if format_item_supermicro_smart(name) == item:
+ return int(status), "(S/N %s) %s" % (serial, status_map[status])
+
+
+check_info['supermicro.smart'] = {
+ 'check_function' : check_supermicro_smart,
+ 'inventory_function' : inventory_supermicro_smart,
+ 'service_description' : "SMART Health %s",
+ 'has_perfdata' : True,
+}
+
+#.
+