Module: check_mk
Branch: master
Commit: 3cd61175ac2633a9aff231afc817d4f0cd94678e
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=3cd61175ac2633…
Author: Moritz Kiemer <mo(a)mathias-kettner.de>
Date: Tue Jun 12 17:36:01 2018 +0200
6205 HP ProLiant Power Supply Unit
hp_proliant_psu: Add information on the currently used capacity,
per PS Unit and in total. Apply warning/critical levels to
capacity in percent.
Change-Id: I424299047365def951f0359671058cfbbfbfd39e
---
.werks/6205 | 12 +++++
checkman/hp_proliant_psu | 10 +++-
checks/hp_proliant_psu | 92 +++++++++++++++++++++++++-------
cmk/gui/plugins/metrics/check_mk.py | 12 +++++
cmk/gui/plugins/wato/check_parameters.py | 20 +++++++
5 files changed, 126 insertions(+), 20 deletions(-)
diff --git a/.werks/6205 b/.werks/6205
new file mode 100644
index 0000000..0e47af2
--- /dev/null
+++ b/.werks/6205
@@ -0,0 +1,12 @@
+Title: hp_proliant_psu: HP ProLiant Power Supply Unit
+Level: 1
+Component: checks
+Compatible: compat
+Edition: cre
+Version: 1.6.0i1
+Date: 1528817718
+Class: feature
+
+Add information on the currently used capacity,
+per PS Unit and in total. Apply warning/critical levels to
+capacity in percent.
diff --git a/checkman/hp_proliant_psu b/checkman/hp_proliant_psu
index 27bfdf0..fea46a0 100644
--- a/checkman/hp_proliant_psu
+++ b/checkman/hp_proliant_psu
@@ -7,11 +7,17 @@ description:
This check connects to the SNMP agent installed on an operating system
on a HP Proliant server and reads the information from the {CPQHLTH-MIB}
MIB.
+
The systems SNMP agent is extended by the HP Management Agents which need
to be installed on the monitored systems.
- The check monitors the operational state of the PSUs in the server.
+ The check monitors the operational state of the PSUs in the server, as well
+ as the current capacity.
+
+ You can change the default Warn/Crit levels of 80/90 percent by setting
+ the service parameter "HP ProLiant Power Supply Unit".
inventory:
One check is automatically created for each PSU entry which is provided
- and marked as present in the SNMP output.
+ and marked as present in the SNMP output. Additionally one service is
+ created which sums up the used capacity.
diff --git a/checks/hp_proliant_psu b/checks/hp_proliant_psu
index 7f5a479..f6040c5 100644
--- a/checks/hp_proliant_psu
+++ b/checks/hp_proliant_psu
@@ -24,41 +24,97 @@
# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
# Boston, MA 02110-1301 USA.
-# Author: Lars Michelsen <lm(a)mathias-kettner.de>
-hp_proliant_psu_status_map = { 1: 'other', 2: 'ok', 3:
'degraded', 4: 'failed' }
-hp_proliant_status2nagios_map = { 'other': 3, 'ok': 0,
'degraded': 2, 'failed': 2 }
-hp_proliant_present_map = { 1: 'other', 2: 'absent', 3: 'present'
}
-hp_proliant_redundant_map = { 1: 'other', 2: 'notRedundant', 3:
'redundant' }
+condition_map = {'1': ('other', 3), # the status could not be
+ # determined or not present.
+ '2': ('ok', 0), # operating normally
+ '3': ('degraded', 2), # component is outside of
+ # normal operating range.
+ '4': ('failed', 2), # component detects condition
+ # that could damage system
+ }
-def inventory_hp_proliant_psu(info):
- if len(info) > 0:
- return [ (line[0] + '/' + line[1], None) for line in info if line[2] ==
'3' ]
-def check_hp_proliant_psu(item, params, info):
- for line in info:
- if '/' in item and line[0] + '/' + line [1] == item or line[0] ==
item:
- chassis, bay, present, status, redundant = line
- snmp_status = hp_proliant_psu_status_map[int(status)]
- status = hp_proliant_status2nagios_map[snmp_status]
+factory_settings["hp_proliant_psu_levels"] = {
+ "levels": (80, 90),
+}
+
+
+def parse_hp_proliant_psu(info):
+ parsed = {}
+ Psu = collections.namedtuple("Psu", ['chassis', 'bay',
'condition',
+ 'used', 'max'])
+ for chassis, bay, present, cond, used, capacity_maximum in info:
+ if present != '3' or capacity_maximum == '0':
+ continue
+ item = "%s/%s" % (chassis, bay)
+ try:
+ parsed[item] = Psu(chassis, bay, cond,
+ int(used), int(capacity_maximum))
+ except ValueError:
+ pass
+ return parsed
+
+
+def inventory_hp_proliant_psu(parsed):
+ """Inventorizes all present PSUs, as well as the Sum over all
PSUs"""
+ for item in parsed:
+ yield item, None
+ yield 'Total', None
+
+
+def check_hp_proliant_psu(item, params, parsed):
+
+ psu = parsed.get(item)
+ if psu is not None:
+ yield 0, "Chassis %s/Bay %s" % (psu.chassis, psu.bay)
+ snmp_state, status = condition_map[psu.condition]
+ yield status, 'State: "%s"' % snmp_state
+ elif item != 'Total':
+ yield 3, "item not found in snmp data"
+ return
+ else: # compute Total
+ PsuTotal = collections.namedtuple("Psu", ['used',
'max'])
+ psu = PsuTotal(sum(v.used for v in parsed.values()),
+ sum(v.max for v in parsed.values()))
+
+ # usage info
+ info = 'Usage: %d Watts' % psu.used
+ cap_perc = psu.used * 100 / psu.max
+ perf_data = [('power_usage_percentage', cap_perc),
+ ('power_usage', psu.used),
+ ]
+
+ # check for user defined thresholds here
+ warn, crit = params["levels"]
+ msg = " (warn/crit at %s/%s)" % (warn, crit)
+ if cap_perc > crit:
+ yield 2, info + msg, perf_data
+ elif cap_perc > warn:
+ yield 1, info + msg, perf_data
+ else:
+ yield 0, info, perf_data
+
- return (status, 'PSU in chassis %s, bay %s is in state
"%s"' %
- (chassis, bay, snmp_status))
- return (3, "item not found in snmp data")
check_info["hp_proliant_psu"] = {
'check_function': check_hp_proliant_psu,
'inventory_function': inventory_hp_proliant_psu,
+ 'parse_function': parse_hp_proliant_psu,
+ 'default_levels_variable': "hp_proliant_psu_levels",
'service_description': 'HW PSU %s',
+ 'group': 'hw_psu',
'snmp_info': (
".1.3.6.1.4.1.232.6.2.9.3.1", [
"1", # cpqHeFltTolPowerSupplyChassis
"2", # cpqHeFltTolPowerSupplyBay
"3", # cpqHeFltTolPowerSupplyPresent
"4", # cpqHeFltTolPowerSupplyCondition
- "9", # cpqHeFltTolPowerSupplyRedundant
+ "7", # cpqHeFltTolPowerSupplyCapacityUsed
+ "8", # cpqHeFltTolPowerSupplyCapacityMaximum
]
),
'snmp_scan_function': \
lambda oid: "proliant" in oid(".1.3.6.1.4.1.232.2.2.4.2.0",
"").lower(),
+ 'has_perfdata': True,
}
diff --git a/cmk/gui/plugins/metrics/check_mk.py b/cmk/gui/plugins/metrics/check_mk.py
index bcdaff3..af5a642 100644
--- a/cmk/gui/plugins/metrics/check_mk.py
+++ b/cmk/gui/plugins/metrics/check_mk.py
@@ -749,6 +749,18 @@ metric_info["trend_hoursleft"] = {
"color" : "#94b65a",
}
+metric_info["power_usage_percentage"] = {
+ "title" : _("Power Usage"),
+ "color" : "13/a",
+ "unit" : "%",
+}
+
+metric_info["power_usage"] = {
+ "title" : _("Power Usage"),
+ "color" : "13/b",
+ "unit" : "w",
+}
+
metric_info["swap_total"] = {
"title" : _("Swap installed"),
"color": "#e0e0e0",
diff --git a/cmk/gui/plugins/wato/check_parameters.py
b/cmk/gui/plugins/wato/check_parameters.py
index 57d9f3c..bbf48f8 100644
--- a/cmk/gui/plugins/wato/check_parameters.py
+++ b/cmk/gui/plugins/wato/check_parameters.py
@@ -5122,6 +5122,26 @@ register_check_parameters(
)
+register_check_parameters(
+ subgroup_environment,
+ 'hw_psu',
+ _("Power Supply Unit"),
+ Dictionary(
+ elements = [
+ ("levels", Tuple(
+ title = _("PSU Capacity Levels"),
+ elements = [
+ Percentage(title = _("Warning at"), default_value = 80.0),
+ Percentage(title = _("Critical at"), default_value =
90.0),
+ ],
+ )),
+ ],
+ ),
+ None,
+ match_type = "dict"
+)
+
+
#.
# .--Storage-------------------------------------------------------------.
# | ____ _ |