Module: check_mk
Branch: master
Commit: 815e203a824bfd24faaf6a4b8f278878f4d08eb2
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=815e203a824bfd…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Wed Aug 5 11:17:07 2015 +0200
#2417 winperf_phydisk: can now work in a cluster environment
---
.werks/2417 | 9 +++++++
ChangeLog | 1 +
checks/winperf_phydisk | 61 +++++++++++++++++++++++++++++++++---------------
3 files changed, 52 insertions(+), 19 deletions(-)
diff --git a/.werks/2417 b/.werks/2417
new file mode 100644
index 0000000..cd80893
--- /dev/null
+++ b/.werks/2417
@@ -0,0 +1,9 @@
+Title: winperf_phydisk: can now work in a cluster environment
+Level: 1
+Component: checks
+Compatible: compat
+Version: 1.2.7i3
+Date: 1438765575
+Class: feature
+
+
diff --git a/ChangeLog b/ChangeLog
index 76f5fbb..5d65306 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -37,6 +37,7 @@
* 2413 esx_vsphere_counters: new check to monitor the disk throughput and latency for ESX datastores
* 2513 new checks sentry_pdu_outlets sentry_pdu_systempower: monitor the system power consumption and outlet states of sentry pdu devices which support the Sentry3-MIB
* 2515 juniper_trpz_aps: check is now cluster-aware...
+ * 2417 winperf_phydisk: can now work in a cluster environment
* 2315 FIX: windows agent: BOM replacement, fixed incorrect byte offset...
* 2316 FIX: windows agent: fix garbled output of cached agent plugins...
* 2358 FIX: check_mk_agent.solaris: more correct computation of zfs used space...
diff --git a/checks/winperf_phydisk b/checks/winperf_phydisk
index 7c267bd..96df0fd 100644
--- a/checks/winperf_phydisk
+++ b/checks/winperf_phydisk
@@ -69,27 +69,50 @@ def winperf_phydisk_convert(info):
# an almost empty section, where the second line is missing completely
if len(info) == 1:
return []
- disks = [ d.split('_')[-1] for d in info[1][3:-1] ]
- for line in info[2:]:
- if line[1] == '-14':
- disk_read_bytes = [ int(x) / 512 for x in line[2:-2] ]
- elif line[1] == '-12':
- disk_write_bytes = [ int(x) / 512 for x in line[2:-2] ]
- elif line[1] == '-20':
- disk_reads = [ int(x) for x in line[2:-2] ]
- elif line[1] == '-18':
- disk_writes = [ int(x) for x in line[2:-2] ]
- elif line[1] == '1168': # Average Disk Read Queue Length
- disk_readq_ctrs = [ int(x) for x in line[2:-2] ]
- elif line[1] == '1170': # Average Disk Read Queue Length
- disk_writeq_ctrs = [ int(x) for x in line[2:-2] ]
- # Missing columns are donted by negative values (Linux sends here latency
- # information)
- empty = [ -1 for x in disks ]
- none = [ None for x in disks ] # Used as dummy node info
+ lines = iter(info)
+ entries = []
+ current_disks = []
+ current_read_bytes = []
+ current_write_bytes = []
+ current_disk_reads = []
+ current_disk_writes = []
+ current_diskreadq_ctrs = []
+ current_diskwriteq_ctrs = []
- return zip(none, disks, disk_read_bytes, disk_write_bytes, disk_reads, disk_writes, empty, disk_readq_ctrs, disk_writeq_ctrs)
+ def finalize_block(nodename):
+ # Missing columns are donted by negative values (Linux sends here latency information)
+ return zip([nodename for x in current_disks], current_disks, current_disk_read_bytes,
+ current_disk_write_bytes, current_disk_reads, current_disk_writes, [-1 for x in current_disks],
+ current_disk_readq_ctrs, current_disk_writeq_ctrs)
+
+ current_node = ""
+ try:
+ while True:
+ line = lines.next()
+ if line[2] == "instances:":
+ if current_node != "":
+ entries.extend(finalize_block(current_node))
+ current_node = line[0]
+ current_disks = [ d.split('_')[-1] for d in line[3:-1] ]
+ elif line[1] == '-14':
+ current_disk_read_bytes = [ int(x) / 512 for x in line[2:-2] ]
+ elif line[1] == '-12':
+ current_disk_write_bytes = [ int(x) / 512 for x in line[2:-2] ]
+ elif line[1] == '-20':
+ current_disk_reads = [ int(x) for x in line[2:-2] ]
+ elif line[1] == '-18':
+ current_disk_writes = [ int(x) for x in line[2:-2] ]
+ elif line[1] == '1168': # Average Disk Read Queue Length
+ current_disk_readq_ctrs = [ int(x) for x in line[2:-2] ]
+ elif line[1] == '1170': # Average Disk Read Queue Length
+ current_disk_writeq_ctrs = [ int(x) for x in line[2:-2] ]
+ except StopIteration:
+ if current_node != "":
+ entries.extend(finalize_block(current_node))
+ pass
+
+ return entries
def inventory_winperf_phydisk(info):
return inventory_diskstat_generic(winperf_phydisk_convert(info))
Module: check_mk
Branch: master
Commit: e5b9432c2837b9fde5039049bdf93bfc10cb70a5
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=e5b9432c2837b9…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Tue Aug 4 13:52:02 2015 +0200
#2416 FIX agent_netapp: fixed rare problem where environmental sensor info (fan, psus, temp) were not shown
If the netapp filers shared an even amount of disks on a shelf, the agent had problems
to determine which filer is responsible for the shelf sensors.
In this case it reported "no one is responsible" which lead to missing checks.
This has been fixed. If the disks of a shelf are evenly shared between the filers the
shelf owner is now determined by comparing the system-name with the partner-system-name.
---
.werks/2416 | 14 +++++++++++
ChangeLog | 1 +
agents/special/agent_netapp | 57 ++++++++++++++++++++++++++++++-------------
3 files changed, 55 insertions(+), 17 deletions(-)
diff --git a/.werks/2416 b/.werks/2416
new file mode 100644
index 0000000..4457084
--- /dev/null
+++ b/.werks/2416
@@ -0,0 +1,14 @@
+Title: agent_netapp: fixed rare problem where environmental sensor info (fan, psus, temp) were not shown
+Level: 1
+Component: checks
+Class: fix
+Compatible: compat
+State: unknown
+Version: 1.2.7i3
+Date: 1438688902
+
+If the netapp filers shared an even amount of disks on a shelf, the agent had problems
+to determine which filer is responsible for the shelf sensors.
+In this case it reported "no one is responsible" which lead to missing checks.
+This has been fixed. If the disks of a shelf are evenly shared between the filers the
+shelf owner is now determined by comparing the system-name with the partner-system-name.
diff --git a/ChangeLog b/ChangeLog
index 7c07852..6661c43 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -98,6 +98,7 @@
* 2414 FIX: logwatch: now really ignores ignored logwatch lines...
* 2415 FIX: check_mail, check_mail_loop: fixed incorrect POP3 ssl setting (thanks to Bernhard Schmidt)...
* 2452 FIX: cisco_temperature: Fixed handling of scaling...
+ * 2416 FIX: agent_netapp: fixed rare problem where environmental sensor info (fan, psus, temp) were not shown...
Multisite:
* 2385 SEC: Fixed possible reflected XSS on all GUI pages where users can produce unhandled exceptions...
diff --git a/agents/special/agent_netapp b/agents/special/agent_netapp
index 32147d1..eca6ce7 100755
--- a/agents/special/agent_netapp
+++ b/agents/special/agent_netapp
@@ -77,10 +77,6 @@ netapp_objects["netapp_api_protocol"] = {
}
}
-netapp_objects["netapp_api_version"] = {
- "configs" : { "system-get-info" : [],
- "system-get-version" : [] }
-}
netapp_objects["netapp_api_status"] = {
"configs" : { "diagnosis-status-get" : [] }
@@ -90,8 +86,20 @@ netapp_objects["netapp_api_cluster"] = {
"configs" : { "cf-status": [] },
}
+def output_api_version(result):
+ shared_data["netapp_api_version"] = {"system-get-info": {}}
+ for node in result["configs"]["system-get-info"].child_get("system-info").children_get():
+ shared_data["netapp_api_version"]["system-get-info"][node.element["name"]] = node.element["content"]
+ generic_output(results, "netapp_api_version")
+netapp_objects["netapp_api_version"] = {
+ "configs" : { "system-get-info" : [],
+ "system-get-version" : [] },
+ "output_function" : output_api_version,
+ "run_last" : True,
+}
+
def output_disks(results):
bay_list = results["configs"]["storage-shelf-bay-list-info"].child_get("shelf-bay-list")
shelf_uids = {}
@@ -139,27 +147,42 @@ netapp_objects["netapp_api_disk"] = {
def output_environment(results):
channel_list = results["configs"]["storage-shelf-environment-list-info"].child_get("shelf-environ-channel-list")
- # We need to iterate over each channel. There are dozens of sensors..
+ # This functions determines if the shelf belongs to the current filer
+ # Right now, the shelf is owned by the filer if it manages more disks than the partner on this shelf.
+ # In a 50:50 scenario the filer names are compared alphanumerically
def shelf_is_mine(shelf_uid):
mine = 0
partner = 0
if "netapp_api_disk" not in shared_data:
return True
- # TODO: We had a case where shelf_uid is not contained in shared_data["netapp_api_disk"]["shelf-uids"].
- # I (mk) do not know why.
+ cmp_name = False
if shelf_uid not in shared_data["netapp_api_disk"]["shelf-uids"]:
- return False
-
- for disk in shared_data["netapp_api_disk"]["shelf-uids"][shelf_uid]:
- state = shared_data["netapp_api_disk"]["disks"].get(disk)
- if state == "partner":
- partner += 1
+ # Could be an empty shelf without disks
+ cmp_name = True
+ else:
+ for disk in shared_data["netapp_api_disk"]["shelf-uids"][shelf_uid]:
+ state = shared_data["netapp_api_disk"]["disks"].get(disk)
+ if state == "partner":
+ partner += 1
+ else:
+ mine += 1
+ if mine == partner:
+ cmp_name = True
else:
- mine += 1
- return mine > partner
+ return mine > partner
+
+ my_name = shared_data.get("netapp_api_version", {}).get("system-get-info", {}).get("system-name")
+ partner_name = shared_data.get("netapp_api_version", {}).get("system-get-info", {}).get("partner-system-name")
+ if my_name and partner_name:
+ return my_name > partner_name
+ else:
+ # As fallback, we always return true
+ # This means that the shelf is owned by both filers -> The sensor will be monitored twice
+ return True
environ = {}
+ # We need to iterate over each channel. There are dozens of sensors..
for channel in channel_list.children_get():
channel_name = channel.child_get_string("channel-name")
shelf_list = channel.child_get("shelf-environ-shelf-list")
@@ -214,7 +237,7 @@ def output_environment(results):
netapp_objects["netapp_api_environ"] = {
"configs" : { "storage-shelf-environment-list-info": [] },
"output_function" : output_environment,
- "requires" : "netapp_api_disk" # This object needs data from netapp_api_disk
+ "run_last" : True,
}
def output_vfiler(results):
@@ -367,7 +390,7 @@ try:
# Basic sorting is sufficient right now...
elements = []
for entry in netapp_objects.items():
- if entry[1].get("requires"):
+ if entry[1].get("run_last"):
elements.append(entry)
else:
elements.insert(0, entry)