Module: check_mk
Branch: master
Commit: f2c5a46d4d466f79094dffa89c615453a1950d33
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=f2c5a46d4d466f…
Author: Florian Heigl <fh(a)mathias-kettner.de>
Date: Thu Aug 16 19:16:28 2012 +0200
Add new Check for monitoring HSRP redundancy groups on Cisco routers
---
ChangeLog | 1 +
checkman/cisco_hsrp | 37 ++++++++++++++++
checks/cisco_hsrp | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 155 insertions(+), 0 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 0f12101..e34dad1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -45,6 +45,7 @@
* carel_uniflair_cooling: new check for monitoring datacenter air conditioning by
"CAREL"
* Added Agent for OpenBSD
* Added Checks for MasterGuard UPS devices
+ * cisco_hsrp: New Check for monitoring HSRP groups on Cisco Routers. (SMIv2 version)
WATO:
* Added permission to control the "clone host" feature in WATO
diff --git a/checkman/cisco_hsrp b/checkman/cisco_hsrp
new file mode 100644
index 0000000..77457f5
--- /dev/null
+++ b/checkman/cisco_hsrp
@@ -0,0 +1,37 @@
+title: Cisco HSRP group status
+agents: snmp
+author: Florian Heigl <fh(a)mathias-kettner.de>
+license: GPLv3
+distribution: check_mk
+description:
+ Cisco routers support virtual redundant interfaces by using HSRP
+ (Hot Standby Router Protocol).
+ HSRP is configured by assigning IP interfaces to a group that has a
+ "floating" virtual IP.
+ If, for example, a router has an interface failure on either of the routed
+ interfaces, the protocol lowers the routers priority, and if that priority
+ drops too low, it will fail over the virtual IP.
+
+
+ The check returns {OK} if the HSRP status is a good one ("active",
+ "standby") and the same as during inventory.
+ If the states flip from "active" to "standby" or vice-versa, the
check goes
+ to {WARN}, assuming that HSRP is doing it's job.
+
+ Should the status ever be a different one that for example points to HSRP
+ still initializing, the check will return {CRIT} as HSRP is assumet to be
+ inoperable in these states.
+
+perfdata:
+ none
+
+
+item:
+ The Virtual IP of the failover group.
+
+inventory:
+ The check reads the HSRP MIB and creates one service per virtual IP. It also
+ stores the HSRP state of
+ the IP as seen by the monitored device, normally either standby or active.
+ It also stores the ID of
+ the failover group. (For future use)
diff --git a/checks/cisco_hsrp b/checks/cisco_hsrp
new file mode 100644
index 0000000..5593cb0
--- /dev/null
+++ b/checks/cisco_hsrp
@@ -0,0 +1,117 @@
+#!/usr/bin/python
+
+
+
+# Good docs:
+#
http://www.cisco.com/en/US/tech/tk648/tk362/technologies_tech_note09186a008…
+#.1.3.6.1.4.1.9.9.106.1.1.1.0 5
+# cHsrpGrpTable
+###########################
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.2.1.192 "HSRP Secret key here"
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.2.7.193 "HSRP Secret key here"
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.3.1.192 100
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.3.7.193 100
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.4.1.192 1
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.4.7.193 1
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.5.1.192 300
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.5.7.193 300
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.6.1.192 2
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.6.7.193 2
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.7.1.192 0
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.7.7.193 0
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.8.1.192 0
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.8.7.193 0
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.9.1.192 3000
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.9.7.193 3000
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.10.1.192 10000
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.10.7.193 10000
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.11.1.192 192.168.10.4
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.11.7.193 172.20.10.20 <- hsrp ip
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.12.1.192 1
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.12.7.193 1
+# HSRP Monitored IP interfaces. If any of those go down, the priority of
+# the router will be lowered.
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.13.1.192 192.168.10.5 <- ip Router 1 int 1
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.13.7.193 172.20.10.21 <- ip Router 2 int 7
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.14.1.192 192.168.10.6 <- ip Router 1 int 1
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.14.7.193 172.20.10.22 <- ip Router 2 int 7
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.15.1.192 6 <- group #1 "standby" state
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.15.7.193 6 <- group #2 "standby" state
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.16.1.192 "00 00 0C 07 AC C0 "
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.16.7.193 "00 00 0C 07 AC C1 "
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.17.1.192 1
+#.1.3.6.1.4.1.9.9.106.1.2.1.1.17.7.193 1
+
+
+# we'll be alerting if the state is not either 5 or 6.
+# We could also not inventorize if the state isn't 5/6 but
+# since you have to configure a group, to even show up in the
+# MIB it's supposedly ok to alert if something isn't right there.
+# otherwise modify the inventory.
+hsrp_states = { 1: "initial", 2: "learn", 3: "listen", 4:
"speak", 5: "standby", 6: "active" }
+
+
+def inventory_cisco_hsrp(info):
+
+ inventory = []
+ for line in info:
+ hsrp_grp_entry, vip, actrouter, sbrouter, hsrp_state, vmac = line
+ interface_index, hsrp_grp = hsrp_grp_entry.split(".")
+ # inventorize HSRP group name+IP and the standby state as seen from
"this" box.
+ inventory.append( (vip, (hsrp_grp, int(hsrp_state))) )
+
+ return inventory
+
+
+def check_cisco_hsrp(item, params, info):
+
+ hsrp_grp_wanted, hsrp_state_wanted = params
+
+ for line in info:
+ hsrp_grp_entry, vip, actrouter, sbrouter, hsrp_state, vmac = line
+ interface_index, hsrp_grp = hsrp_grp_entry.split(".")
+ hsrp_state = int(hsrp_state)
+
+ if vip == item:
+ # FIXME: This should be shorter.
+ # Validate that we the inventorized state is a "good one"
+ # if it's also the one we have now, then we're fine.
+
+ if hsrp_state_wanted == 5 and hsrp_state == hsrp_state_wanted:
+ state = 0
+ msgtxt = "Redundancy Group %s is OK" % hsrp_grp
+ elif hsrp_state_wanted == 6 and hsrp_state == hsrp_state_wanted:
+ state = 0
+ msgtxt = "Redundancy Group %s is OK" % hsrp_grp
+ # otherwise if it's a good one, but flipped, then we are in a failover
+ elif hsrp_state == 5 or hsrp_state == 6:
+ state = 1
+ msgtxt = "Redundancy Group %s has failed over" % hsrp_grp
+ # anything else must be a non-operative state already
+ else:
+ state = 2
+ msgtxt = "Redundancy Group %s is %s" % ( hsrp_grp,
hsrp_states[hsrp_state])
+
+ return (state, nagios_state_names[state] + " - " + msgtxt)
+
+ return (3, "UNKNOWN - HSRP Group %s not found in Agent output" %
hsrp_grp_wanted )
+
+
+
+# FIXME: Outdated format. Fix after discussing WATO options.
+check_info["cisco_hsrp"] = (check_cisco_hsrp, "HSRP Group %s", 0,
inventory_cisco_hsrp)
+
+snmp_info["cisco_hsrp"] = \
+ ( ".1.3.6.1.4.1.9.9.106.1.2.1.1", [
+ OID_END,
+ "11", # cHsrpGrpVirtualIpAddr
+ "13", # cHsrpGrpActiveRouter
+ "14", # cHsrpGrpStandbyRouter
+ "15", # cHsrpGrpStandbyState
+ "16", # cHsrpGrpVirtualMacAddr
+ ])
+
+# We can't scan for the HSRP table since the entries are indexed
+# based on information we dont have without fetching all of it
+# Instead we use the HSRP timeout
+snmp_scan_functions["cisco_hsrp"] = lambda oid: "cisco" in
oid(".1.3.6.1.2.1.1.1.0").lower() and
oid(".1.3.6.1.4.1.9.9.106.1.1.1.0")