Module: check_mk
Branch: master
Commit: 6d9be301baf2fb80116d80ffa1b9554168d4e7db
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=6d9be301baf2fb…
Author: Tom Baerwinkel <tb(a)mathias-kettner.de>
Date: Fri Jan 26 13:51:47 2018 +0100
5494 FIX nfsmounts, cifsmounts: increase timeout of nfsmounts and deliver correct status
details
The waiting time of nfsmounts was increased to prevent agent timeouts.
Furthermore, the common check function was rewritten to deliver correct error
messages in the status details. E.g. a timeout in the agent now shows the
message "Server not responding" instead of "Stale fs handle".
Change-Id: Id9d5c4f9f6a0374c4e33105126c7df0dc7898747
---
.werks/5494 | 13 ++++
agents/check_mk_agent.aix | 2 +-
agents/check_mk_agent.linux | 4 +-
agents/check_mk_agent.openwrt | 4 +-
checks/network_fs.include | 77 +++++++++----------
.../checks/test_nfsmounts_and_cifsmounts_check.py | 86 ++++++++++++++++++++++
6 files changed, 139 insertions(+), 47 deletions(-)
diff --git a/.werks/5494 b/.werks/5494
new file mode 100644
index 0000000..689be75
--- /dev/null
+++ b/.werks/5494
@@ -0,0 +1,13 @@
+Title: nfsmounts, cifsmounts: increase timeout of nfsmounts and deliver correct status
details
+Level: 1
+Component: checks
+Compatible: compat
+Edition: cre
+Version: 1.5.0i3
+Date: 1516970190
+Class: fix
+
+The waiting time of nfsmounts was increased to prevent agent timeouts.
+Furthermore, the common check function was rewritten to deliver correct error
+messages in the status details. E.g. a timeout in the agent now shows the
+message "Server not responding" instead of "Stale fs handle".
diff --git a/agents/check_mk_agent.aix b/agents/check_mk_agent.aix
index a823d5a..ef7db0d 100755
--- a/agents/check_mk_agent.aix
+++ b/agents/check_mk_agent.aix
@@ -143,7 +143,7 @@ if type stat >/dev/null 2>&1 ; then
mount | grep ' nfs' | awk '{print $3;}' | \
while read MP
do
- waitmax 2 stat -f -c '"'$MP' ok - - - -"'
"$MP" || \
+ waitmax 5 stat -f -c '"'$MP' ok - - - -"'
"$MP" || \
echo "$MP hanging 0 0 0 0"
done
echo '<<<cifsmounts>>>'
diff --git a/agents/check_mk_agent.linux b/agents/check_mk_agent.linux
index 8354e1a..f857fc1 100755
--- a/agents/check_mk_agent.linux
+++ b/agents/check_mk_agent.linux
@@ -331,10 +331,10 @@ then
while read MP
do
if [ $STAT_VERSION != $STAT_BROKE ]; then
- waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" ||
\
+ waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP" ||
\
echo "$MP hanging 0 0 0 0"
else
- waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP"
&& \
+ waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP"
&& \
printf '\n'|| echo "$MP hanging 0 0 0 0"
fi
done
diff --git a/agents/check_mk_agent.openwrt b/agents/check_mk_agent.openwrt
index 163e696..d9eb644 100755
--- a/agents/check_mk_agent.openwrt
+++ b/agents/check_mk_agent.openwrt
@@ -281,10 +281,10 @@ then
if [ ! -r $MP ]; then
echo "$MP Permission denied"
if [ $STAT_VERSION != $STAT_BROKE ]; then
- waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP" || \
+ waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP" || \
echo "$MP hanging 0 0 0 0"
else
- waitmax -s 9 2 stat -f -c "$MP ok %b %f %a %s" "$MP"
&& \
+ waitmax -s 9 5 stat -f -c "$MP ok %b %f %a %s" "$MP"
&& \
printf '\n'|| echo "$MP hanging 0 0 0 0"
fi
done
diff --git a/checks/network_fs.include b/checks/network_fs.include
index a52dd15..c4ad622 100644
--- a/checks/network_fs.include
+++ b/checks/network_fs.include
@@ -52,45 +52,38 @@ def inventory_network_fs_mounts(parsed):
def check_network_fs_mounts(item, params, parsed):
- if item in parsed:
- attrs = parsed[item]
- state = attrs["state"]
- data = attrs.get("data")
-
- if state == "Permission denied" or data is None:
- return 2, "Permissions denied"
-
- size_blocks_, unused_, free_blocks_, block_size_ = data
- if size_blocks_ == "-":
- if state == 'ok':
- return 0, "mount seems OK"
- else:
- return 2, "Server not responding"
-
- size_blocks = int(size_blocks_)
- free_blocks = int(free_blocks_) # for non-root user
- blocksize = int(block_size_)
-
- if size_blocks <= 0 or free_blocks < 0 or blocksize > 1024*1024:
- return 2, "Stale fs handle"
-
- size_bytes = size_blocks * blocksize
- free_bytes = free_blocks * blocksize
- used_bytes = size_bytes - free_bytes
- used_perc = 100.0 * used_bytes / size_bytes
- perfdata = None
- if params is not None and params.get("has_perfdata"):
- perfdata = [("fs_size", size_bytes), ("fs_used",
used_bytes)]
-
- if state == 'ok':
- if size_bytes == 0:
- return 0, "server is responding", perfdata
-
- return 0, "%.1f%% used (%s of %s)" % \
- (used_perc, get_bytes_human_readable(used_bytes),
- get_bytes_human_readable(size_bytes)), perfdata
-
- else:
- return 2, "server not responding", perfdata
-
- return 3, "%s not mounted" % item
+ attrs = parsed.get(item)
+ if not attrs:
+ return 3, "%s not mounted" % item
+
+ state = attrs["state"]
+ if state == "Permission denied":
+ return 2, "Permission denied"
+ elif state == "hanging":
+ return 2, "Server not responding"
+ elif state != 'ok':
+ return 2, "Unknown state"
+
+ data = attrs["data"]
+ if data == ['-', '-', '-', '-']:
+ return 0, "Mount seems OK"
+ size_blocks, _, free_blocks, blocksize = map(int, data)
+
+ if size_blocks <= 0 or free_blocks < 0 or blocksize > 1024 * 1024:
+ return 2, "Stale fs handle"
+
+ size_bytes = size_blocks * blocksize
+ free_bytes = free_blocks * blocksize
+ used_bytes = size_bytes - free_bytes
+ used_perc = 100.0 * used_bytes / size_bytes
+
+ perfdata = None
+ if params is not None and params.get("has_perfdata"):
+ perfdata = [("fs_size", size_bytes), ("fs_used",
used_bytes)]
+
+ infotext = "%.1f%% used (%s of %s)" % (
+ used_perc,
+ get_bytes_human_readable(used_bytes),
+ get_bytes_human_readable(size_bytes)
+ )
+ return 0, infotext, perfdata
diff --git a/tests/checks/test_nfsmounts_and_cifsmounts_check.py
b/tests/checks/test_nfsmounts_and_cifsmounts_check.py
new file mode 100644
index 0000000..8a2a9bd
--- /dev/null
+++ b/tests/checks/test_nfsmounts_and_cifsmounts_check.py
@@ -0,0 +1,86 @@
+import pytest
+from checktestlib import BasicCheckResult, PerfValue
+
+# since both nfsmounts and cifsmounts use the parse, inventory
+# and check functions from network_fs.include unchanged we test
+# both checks here.
+
+pytestmark = pytest.mark.checks
+
+
+(a)pytest.mark.parametrize("info,discovery_expected,check_expected"cted", [
+ ( # no info
+ [],
+ [],
+ (['', None, BasicCheckResult(3, ' not mounted', None)],)
+ ),
+ ( # single mountpoint with data
+ [[u'/ABCshare', u'ok', u'491520', u'460182',
u'460182', u'65536']],
+ [('/ABCshare', None)],
+ [('/ABCshare', {}, BasicCheckResult(0, "6.4% used (1.91 GB of 30.00
GB)", None)),
+ ('/ZZZshare', {}, BasicCheckResult(3, "/ZZZshare not mounted",
None))]
+ ),
+ ( # two mountpoints with empty data
+ [[u'/AB', u'ok', u'-', u'-', u'-',
u'-'],
+ [u'/ABC', u'ok', u'-', u'-', u'-',
u'-']],
+ [('/AB', None),
+ ('/ABC', None)],
+ [('/AB', {}, BasicCheckResult(0, "Mount seems OK", None)),
+ ('/ABC', {}, BasicCheckResult(0, "Mount seems OK", None))]
+ ),
+ ( # Mountpoint with spaces and permission denied
+ [[u'/var/dba', u'export', u'Permission',
u'denied'],
+ [u'/var/dbaexport', u'ok', u'201326592',
u'170803720', u'170803720', u'32768']],
+ [('/var/dbaexport', None),
+ ('/var/dba export', None)],
+ [('/var/dba export', {}, BasicCheckResult(2, 'Permission denied',
None)),
+ ('/var/dbaexport', {}, BasicCheckResult(0, '15.2% used (931.48 GB of
6.00 TB)', None))]
+ ),
+ ( # with perfdata
+ [[u'/PERFshare', u'ok', u'491520', u'460182',
u'460182', u'65536']],
+ [('/PERFshare', None)],
+ [('/PERFshare', {'has_perfdata': True},
+ BasicCheckResult(0, "6.4% used (1.91 GB of 30.00 GB)", [
+ PerfValue('fs_size', 491520 * 65536), PerfValue('fs_used',
491520 * 65536 - 460182 * 65536)
+ ]))]
+ ),
+ ( # state == 'hanging'
+ [[u'/test', u'hanging', u'hanging', u'0',
u'0', u'0', u'0']],
+ [('/test hanging', None)],
+ [('/test hanging', {'has_perfdata': True},
+ BasicCheckResult(2, "Server not responding", None))]
+ ),
+ ( # unknown state
+ [[u'/test', u'unknown', u'unknown', u'1',
u'1', u'1', u'1']],
+ [('/test unknown', None)],
+ [('/test unknown', {}, BasicCheckResult(2, "Unknown state",
None))]
+ ),
+ ( # zero block size
+ [[u'/test', u'perfdata', u'ok', u'0',
u'460182', u'460182', u'0']],
+ [('/test perfdata', None)],
+ [('/test perfdata', {'has_perfdata': True},
+ # TODO: display a better error message
+ #BasicCheckResult(0, "server is responding",
[PerfValue('fs_size', 0), PerfValue('fs_used', 0)]))]
+ BasicCheckResult(2, "Stale fs handle", None))]
+ ),
+])
+def test_nfsmounts(check_manager, info, discovery_expected, check_expected):
+ check_nfs = check_manager.get_check("nfsmounts")
+ check_cifs = check_manager.get_check("cifsmounts")
+
+ # assure that the code of both checks is identical
+ assert (check_nfs.info['parse_function'].func_code.co_code ==
+ check_cifs.info['parse_function'].func_code.co_code)
+ assert (check_nfs.info['inventory_function'].func_code.co_code ==
+ check_cifs.info['inventory_function'].func_code.co_code)
+ assert (check_nfs.info['check_function'].func_code.co_code ==
+ check_cifs.info['check_function'].func_code.co_code)
+
+ parsed = check_nfs.run_parse(info)
+
+ discovery = list(check_nfs.run_discovery(parsed))
+ assert discovery == discovery_expected
+
+ for item, params, result_expected in check_expected:
+ result = BasicCheckResult(*check_nfs.run_check(item, params, parsed))
+ assert result == result_expected