Module: check_mk
Branch: master
Commit: 4cfa958b8350569216a75109d577bc2b37c899c6
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=4cfa958b835056…
Author: Moritz Kiemer <mo(a)mathias-kettner.de>
Date: Thu Jan 31 08:40:24 2019 +0100
Revert "7133 FIX mem.win: Change label from "Commit Charge" to "Page file""
This reverts commit 8aadece285e705cab90fb78319d26d48f693b68a.
Change-Id: I288bab8319ba6bd20d32454425e5f4c7bbd23722
---
.werks/7133 | 12 ---------
checkman/mem.win | 29 +++++++++++++++++-----
checks/mem | 2 +-
.../generictests/datasets/mem_win_regression.py | 4 +--
4 files changed, 26 insertions(+), 21 deletions(-)
diff --git a/.werks/7133 b/.werks/7133
deleted file mode 100644
index 7497b8b..0000000
--- a/.werks/7133
+++ /dev/null
@@ -1,12 +0,0 @@
-Title: mem.win: Change label from "Commit Charge" to "Page file"
-Level: 1
-Component: checks
-Compatible: compat
-Edition: cre
-Version: 1.6.0i1
-Date: 1548670807
-Class: fix
-
-The agent actually reports total/available page file, so adjust the check output
-accordingly and replace the confusing "Commit Charge" label.
-This change only affects the displayed text.
diff --git a/checkman/mem.win b/checkman/mem.win
index ae5c0b2..79c86cf 100644
--- a/checkman/mem.win
+++ b/checkman/mem.win
@@ -5,13 +5,30 @@ license: GPL
distribution: check_mk
description:
This check measures the current usage of physical RAM and
- of the page file.
+ of the commit charge.
+ The "commit charge" was previously confusingly named "pagefile".
+ Please note that Microsoft themselves use this naming in
+ some applications.
+
+ Due to how Windows optimizes memory management, physical memory
+ is in practice almost always fully utilized. The Memory usage this
+ check displays says how much memory is actually required by processes,
+ in contrast to caches that could be freed if the need arises.
+
+ In the same vein Windows may hold data in RAM and pagefile at the same
+ time or it may hold data in the pagefile even when there is RAM available,
+ when it determines this may improve performance.
+ As a result, the real pagefile usage is not only hard to
+ determine, it's also utterly useless when we want to determine
+ resource usage on the system.
+
+ The commit charge instead tells us how much of the memory used in
+ total (RAM + swap) is really required, ignoring overlap and caches.
+ Commit Charge minus Memory Usage is approximately the amount
+ of the pagefile used for "relevant" data, but further optimizations
+ may lead to offsets (to the point where Commit Charge - Memory Usage
+ can be negative).
- The page file metric is determined using the fields 'ullTotalPageFile'
- (current committed memory limit for the system or the current
- process, whichever is smaller) and 'ullAvailPageFile' (maximum
- amount of memory the current process can commit) of the MEMORYSTATUSEX
- structure as returned by the windows API function 'GlobalMemoryStatusEx'.
item:
{None}
diff --git a/checks/mem b/checks/mem
index fad8250..62b9032 100644
--- a/checks/mem
+++ b/checks/mem
@@ -551,7 +551,7 @@ def check_mem_windows(_no_item, params, info):
return warn, crit
for title, prefix, paramname in [("Memory usage", "Mem", "memory"),
- ("Page file", "Page", "pagefile")]:
+ ("Commit Charge", "Page", "pagefile")]:
total_kb = meminfo.get("%sTotal" % prefix)
free_kb = meminfo.get("%sFree" % prefix)
diff --git a/tests/unit/checks/generictests/datasets/mem_win_regression.py b/tests/unit/checks/generictests/datasets/mem_win_regression.py
index dae0614..86d1460 100644
--- a/tests/unit/checks/generictests/datasets/mem_win_regression.py
+++ b/tests/unit/checks/generictests/datasets/mem_win_regression.py
@@ -25,7 +25,7 @@ checks = {
('memory', 16369.96875, 26213.990625, 29490.739453125, 0, 32767.48828125),
('mem_total', 32767.48828125, None, None, None, None),
]),
- (0, 'Page file: 20.1% (19.04 GB/94.50 GB)', [
+ (0, 'Commit Charge: 20.1% (19.04 GB/94.50 GB)', [
('pagefile', 19491.90625, 77413.990625, 87090.739453125, 0, 96767.48828125),
('pagefile_total', 96767.48828125, None, None, None, None),
]),
@@ -35,7 +35,7 @@ checks = {
('memory', 16369.96875, 26214.48828125, 29490.48828125, 0, 32767.48828125),
('mem_total', 32767.48828125, None, None, None, None),
]),
- (2, 'Page file: 20.1% (19.04 GB/94.50 GB)', [
+ (2, 'Commit Charge: 20.1% (19.04 GB/94.50 GB)', [
('pagefile', 19491.90625, 77275.48828125, 19353.48828125, 0, 96767.48828125),
('pagefile_total', 96767.48828125, None, None, None, None),
]),
Module: check_mk
Branch: master
Commit: 178d0d495532c6c63a91e8bd74b57c5dcc8e77d5
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=178d0d495532c6…
Author: Moritz Kiemer <mo(a)mathias-kettner.de>
Date: Wed Jan 30 16:48:42 2019 +0100
azure: Better handle (authorization) errors
We can now see them in the azure_agent_info service.
Change-Id: I851f2b38528d926291335dfb12f1b7bf78694f8a
---
agents/special/agent_azure | 25 +++++++++++++++----------
checks/azure_agent_info | 3 +++
2 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/agents/special/agent_azure b/agents/special/agent_azure
index b0bad53..03f3120 100755
--- a/agents/special/agent_azure
+++ b/agents/special/agent_azure
@@ -216,14 +216,6 @@ def parse_arguments(argv):
return args
-def _add_hint(msg):
- if "does not have authorization to perform action" in msg:
- hint = "Make sure you have a proper role asigned to your client!"
- else:
- return msg
- return "%s\nHINT: %s\n" % (msg, hint)
-
-
# The following *Config objects provide a Configuration instance as described in
# CMK-513 (comment-12620).
# For now the passed commandline arguments are used to create it.
@@ -588,6 +580,20 @@ def write_groups(resources):
Section('agent_info', group).write()
+def write_exception_to_agent_info_section(exception):
+ # those exeptions are quite noisy. try to make them more concise:
+ msg = str(exception).split('Trace ID')[0]
+ msg = msg.split(':', 2)[-1].strip(' ,')
+
+ if "does not have authorization to perform action" in msg:
+ msg += "HINT: Make sure you have a proper role asigned to your client!"
+
+ value = json.dumps((2, msg))
+ section = Section('agent_info')
+ section.add(('agent-bailout', value))
+ section.write()
+
+
def main(argv=None):
args = parse_arguments(argv or sys.argv[1:])
@@ -613,8 +619,7 @@ def main(argv=None):
section.write()
except () if args.debug else Exception as exc:
- sys.stderr.write(_add_hint(str(exc)))
- return 1
+ write_exception_to_agent_info_section(exc)
return 0
diff --git a/checks/azure_agent_info b/checks/azure_agent_info
index 1d295db..9007ffe 100644
--- a/checks/azure_agent_info
+++ b/checks/azure_agent_info
@@ -78,6 +78,9 @@ def discovery_azure_agent_info(_parsed):
def check_azure_agent_info(_no_item, params, parsed):
+ for status, text in parsed.get('agent-bailout', []):
+ yield status, text
+
reads = parsed.get('remaining-reads')
# this is only reported for the Datasource Host, so None
# is ignored.
Module: check_mk
Branch: master
Commit: 374fa037fcb60a1861cc65a9539c12e4acd1c656
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=374fa037fcb60a…
Author: Tom Baerwinkel <tb(a)mathias-kettner.de>
Date: Mon Jan 28 09:46:41 2019 +0100
7056 Kubernetes monitoring
A special agent and multiple checks to support the monitoring of
Kubernetes clusters are added. For a detailled description of the
features and a setup guide please refer to the official Check_MK
guide:
https://mathias-kettner.de/cms_monitoring_kubernetes.html
CMK-1518
Change-Id: Ice1bdc672306cd1739a5940ab05bdb85b52f8f1c
---
.werks/7056 | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/.werks/7056 b/.werks/7056
new file mode 100644
index 0000000..2e55219
--- /dev/null
+++ b/.werks/7056
@@ -0,0 +1,16 @@
+Title: Kubernetes monitoring
+Level: 2
+Component: checks
+Class: feature
+Compatible: compat
+Edition: cre
+State: unknown
+Version: 1.6.0i1
+Date: 1548664673
+
+A special agent and multiple checks to support the monitoring of
+Kubernetes clusters are added. For a detailled description of the
+features and a setup guide please refer to the official Check_MK
+guide:
+
+https://mathias-kettner.de/cms_monitoring_kubernetes.html
Module: check_mk
Branch: master
Commit: a62c3784f2ecf3332331f87cd681d4f1b05f36ef
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=a62c3784f2ecf3…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Wed Jan 30 21:27:39 2019 +0100
Containerized Check_MK: Fixed tmpfs handling
Detect the situation where a tmpfs is mounted into the site by
the node. In this situation a omd umount call must not try to unmount
the tmpfs because this will always fail with a permission denied
issue (at least in case of non priviliged containers).
Change-Id: Ie277b673b63bdffb5644b6cbb06aeef0cad9a492
---
omd/packages/omd/omdlib/main.py | 57 +++++++++++++++++++++++++----------------
1 file changed, 35 insertions(+), 22 deletions(-)
diff --git a/omd/packages/omd/omdlib/main.py b/omd/packages/omd/omdlib/main.py
index b96e6bb..42e9447 100644
--- a/omd/packages/omd/omdlib/main.py
+++ b/omd/packages/omd/omdlib/main.py
@@ -1621,49 +1621,62 @@ def is_dockerized():
return os.path.exists("/.dockerenv")
+def tmpfs_is_managed_by_node(site):
+ """When running in a container, and the tmpfs is managed by the node, the
+ mount is visible, but can not be unmounted. umount exits with 32 in this
+ case. Treat this case like there is no tmpfs and only the directory needs
+ to be cleaned."""
+ if not is_dockerized():
+ return False
+
+ if not tmpfs_mounted(site.name):
+ return False
+
+ return subprocess.call(["umount", site.tmp_dir],
+ stdout=open(os.devnull, "w"),
+ stderr=subprocess.STDOUT) == 32
+
+
def unmount_tmpfs(site, output=True, kill=False):
# Clear directory hierarchy when not using a tmpfs
# During omd update TMPFS hook might not be set so assume
# that the hook is enabled by default.
# If kill is True, then we do an fuser -k on the tmp
# directory first.
- if not tmpfs_mounted(site.name):
+ if not tmpfs_mounted(site.name) or tmpfs_is_managed_by_node(site):
tmp = site.tmp_dir
if os.path.exists(tmp):
if output:
- sys.stdout.write("Cleaning up temp filesystem...")
+ sys.stdout.write("Cleaning up tmp directory...")
sys.stdout.flush()
delete_directory_contents(tmp)
if output:
ok()
return True
- else:
- if output:
- sys.stdout.write("Unmounting temporary filesystem...")
-
- for _t in range(0, 10):
- if subprocess.call(["umount", site.tmp_dir]) == 0:
- if output:
- ok()
- return True
+ if output:
+ sys.stdout.write("Unmounting temporary filesystem...")
- if kill:
- if output:
- sys.stdout.write("Killing processes still using '%s'\n" % site.tmp_dir)
- subprocess.call(["fuser", "--silent", "-k", site.tmp_dir])
+ for _t in range(0, 10):
+ if subprocess.call(["umount", site.tmp_dir]) == 0:
+ if output:
+ ok()
+ return True
+ if kill:
if output:
- sys.stdout.write(kill and "K" or ".")
- sys.stdout.flush()
- time.sleep(1)
+ sys.stdout.write("Killing processes still using '%s'\n" % site.tmp_dir)
+ subprocess.call(["fuser", "--silent", "-k", site.tmp_dir])
if output:
- bail_out(tty_error + ": Cannot unmount tmp filesystem.")
- else:
- return False
+ sys.stdout.write(kill and "K" or ".")
+ sys.stdout.flush()
+ time.sleep(1)
- return True
+ if output:
+ bail_out(tty_error + ": Cannot unmount temporary filesystem.")
+ else:
+ return False
def add_to_fstab(site, tmpfs_size=None):