Module: check_mk
Branch: master
Commit: 59f6f7e32c41046a91a348f820b76fd2e4c408f3
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=59f6f7e32c4104…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Tue Jul 23 10:06:15 2013 +0200
new script cmk_delete_host.py
---
doc/treasures/cmk_delete_host.py | 130 ++++++++++++++++++++++++++++++++++++++
1 file changed, 130 insertions(+)
diff --git a/doc/treasures/cmk_delete_host.py b/doc/treasures/cmk_delete_host.py
new file mode 100755
index 0000000..978a1c5
--- /dev/null
+++ b/doc/treasures/cmk_delete_host.py
@@ -0,0 +1,130 @@
+#!/usr/bin/python
+# -*- encoding: utf-8; py-indent-offset: 4 -*-
+# +------------------------------------------------------------------+
+# | ____ _ _ __ __ _ __ |
+# | / ___| |__ ___ ___| | __ | \/ | |/ / |
+# | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
+# | | |___| | | | __/ (__| < | | | | . \ |
+# | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
+# | |
+# | Copyright Mathias Kettner 2013 mk(a)mathias-kettner.de |
+# +------------------------------------------------------------------+
+#
+# This file is part of Check_MK.
+# The official homepage is at http://mathias-kettner.de/check_mk.
+#
+# check_mk is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation in version 2. check_mk is distributed
+# in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
+# out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU General Public License for more de-
+# ails. You should have received a copy of the GNU General Public
+# License along with GNU Make; see the file COPYING. If not, write
+# to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+# Boston, MA 02110-1301 USA.
+
+#Author: Andreas Boesl ab(a)mathias-kettner.de
+
+# This script
+# 1) deletes the given hosts from WATO
+# 2) creates new core config
+# 3) restarts omd site
+# 4) removes all data (including rrd files) from the given hosts
+
+import os, sys, shutil, pprint
+
+# Set automation user login credentials
+automation_user = "cmdb_automation"
+automation_secret = "UVEMFMCBITYUURHPILKP"
+
+
+def usage():
+ print "usage: cmk_delete_host {hostname1 hostname2}"
+ print "Script needs to be executed as site user"
+
+if len(sys.argv) == 1:
+ usage()
+ sys.exit(1)
+
+# Check if the given host exists
+# Simple approach: Each host managed by wato has a host tag for its folder
+# which starts with /wato/
+# If this is missing -> host does not exist
+
+# Remove the host in WATO
+omd_site = os.environ["OMD_SITE"]
+if not omd_site:
+ print "This script is only executable as site user"
+ sys.exit(1)
+
+g_current_host = None
+def log(text):
+ print "%s: %s" % (g_current_host, text)
+
+def get_wato_folder(hostname):
+ process = os.popen("cmk -D " + hostname, "r")
+ output = process.read().split("\n")
+ for line in output:
+ if line.startswith("Tags:"):
+ for tag in line.split()[1:]:
+ if tag.startswith("/wato/"):
+ return tag[6:-1]
+ else:
+ log("Host not managed by WATO")
+ break
+ else:
+ log("Host has no tags")
+ return
+
+
+# Start deletion
+wato_config_changed = False
+for hostname in sys.argv[1:]:
+ if not hostname.strip():
+ continue
+
+ g_current_host = hostname
+ wato_folder = get_wato_folder(hostname)
+ if wato_folder == None:
+ continue
+
+ wato_config_changed = True
+ # Remove the host in WATO
+ command = "curl -s 'http://localhost/%(omd_site)s/check_mk/wato.py?mode=folder"\
+ "&_username=%(automation_user)s"\
+ "&_secret=%(automation_secret)s"\
+ "&_do_actions=yes"\
+ "&_do_confirm=yes"\
+ "&_delete_host=%(hostname)s"\
+ "&_transid=-1"\
+ "&folder=%(wato_folder)s' 1>/dev/null" % { "automation_user": automation_user,
+ "automation_secret": automation_secret,
+ "omd_site": omd_site,
+ "hostname": hostname,
+ "wato_folder": wato_folder }
+
+ os.system(command)
+
+# Generate monitoring configuration and restart core
+if wato_config_changed:
+ os.system("cmk -R ; omd restart")
+
+# Flushing counters, cache files, piggy files, logfiles and autochecks
+print "Flushing data"
+os.system("cmk --flush %s" % " ".join(sys.argv[1:]))
+for hostname in sys.argv[1:]:
+ if not hostname.strip():
+ continue
+
+ g_current_host = hostname
+
+ # Remove rrd files
+ path_rrd = "~/var/pnp4nagios/perfdata/" + hostname
+ path_rrd = os.path.expanduser(path_rrd)
+ if os.path.exists(path_rrd):
+ log("Removing path " + path_rrd)
+ shutil.rmtree(path_rrd)
+ else:
+ log("Host has no perfdata")
+
Module: check_mk
Branch: master
Commit: 006fbac37a02b38c1bef096847dc3ddab9cc6876
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=006fbac37a02b3…
Author: Andreas Boesl <ab(a)mathias-kettner.de>
Date: Tue Jul 23 09:11:55 2013 +0200
fixed typo
---
web/htdocs/wato.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/web/htdocs/wato.py b/web/htdocs/wato.py
index 8a3223c..d2149c7 100644
--- a/web/htdocs/wato.py
+++ b/web/htdocs/wato.py
@@ -9778,7 +9778,7 @@ def mode_ineffective_rules(phase):
html.write('<div class=rulesets>')
all_hosts = load_all_hosts()
- html.write("<div class=info>" + _("The following rules do match match to any of the existing hosts.") + "</div>")
+ html.write("<div class=info>" + _("The following rules do not match to any of the existing hosts.") + "</div>")
have_ineffective = False
for groupname in groupnames:
Module: check_mk
Branch: master
Commit: 3335179923e78ad11a6b6f3d9e6e28e72bfdc6dd
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=3335179923e78a…
Author: Mathias Kettner <mk(a)mathias-kettner.de>
Date: Mon Jul 22 17:51:47 2013 +0200
Simplify implementation of notify --keepalive
---
modules/notify.py | 131 +++++++++++++++++++++++++++++------------------------
1 file changed, 72 insertions(+), 59 deletions(-)
diff --git a/modules/notify.py b/modules/notify.py
index 9102a46..39dd198 100644
--- a/modules/notify.py
+++ b/modules/notify.py
@@ -311,7 +311,7 @@ def do_notify(args):
notify_keepalive()
else:
- notify_notify()
+ notify_notify(notification_context_from_env())
except Exception, e:
if g_interactive:
@@ -345,81 +345,91 @@ def notify_config_timestamp():
def notify_keepalive():
- global g_notify_readahead_buffer
- g_notify_readahead_buffer = ""
config_timestamp = notify_config_timestamp()
- # Send signal that we are ready to receive the next notification
- sys.stdout.write("*")
- sys.stdout.flush()
+ # Send signal that we are ready to receive the next notification, but
+ # not after a config-reload-restart (see below)
+ if os.getenv("CMK_NOTIFY_RESTART") != "1":
+ notify_log("Starting in keepalive mode with PID %d" % os.getpid())
+ sys.stdout.write("*")
+ sys.stdout.flush()
+ else:
+ notify_log("We are back after a restart.")
while True:
try:
# If the configuration has changed, we do a restart. But we do
# this check just before the next notification arrives. We must
- # *not* read data from stdin, just peek! On the other hand we
- # must *not* restart when there is danger that the buffered
- # sys.stdin has already read data from the next notification. That
- # would get lost! So we do restart if:
- # - The last time we look *no* data was available on stdin.
- # - Now there *is* data available
- # - The buffer of stdin is empty
- # - The timestamp of the youngest configuration file has changed.
+ # *not* read data from stdin, just peek! There is still one
+ # problem: when restarting we must *not* send the initial '*'
+ # byte, because that must be not no sooner then the notification
+ # has been sent. We do this by setting the environment variable
+ # CMK_NOTIFY_RESTART=1
+
if notify_data_available():
- if not g_notify_readahead_buffer:
- current_config_timestamp = notify_config_timestamp()
- if current_config_timestamp > config_timestamp:
- notify_log("Configuration has changed. Restarting myself.")
- os.execvp("cmk", sys.argv)
-
- new_data = os.read(0, 20000)
- if not new_data:
- sys.exit(0) # closed stdin
- g_notify_readahead_buffer += new_data
- if g_notify_readahead_buffer.startswith('\n\n'):
- sys.exit(0)
- while '\n\n' in g_notify_readahead_buffer:
+ current_config_timestamp = notify_config_timestamp()
+ if current_config_timestamp > config_timestamp:
+ notify_log("Configuration has changed. Restarting myself.")
+ os.putenv("CMK_NOTIFY_RESTART", "1")
+ os.execvp("cmk", sys.argv)
+
+ data = ""
+ while not data.endswith("\n\n"):
try:
- notify_notify()
+ new_data = ""
+ new_data = os.read(0, 32768)
+ except IOError, e:
+ new_data = ""
except Exception, e:
if opt_debug:
raise
- notify_log("ERROR %s\n%s" % (e, format_exception()))
- sys.stdout.write("*")
- sys.stdout.flush()
+ notify_log("Cannot read data from CMC: %s" % e)
+
+ if not new_data:
+ notify_log("CMC has closed the connection. Shutting down.")
+ sys.exit(0) # closed stdin, this is
+ data += new_data
+
+ try:
+ context = notification_context_from_string(data.rstrip('\n'))
+ notify_notify(context)
+ except Exception, e:
+ if opt_debug:
+ raise
+ notify_log("ERROR %s\n%s" % (e, format_exception()))
+
+ # Signal that we are ready for the next notification
+ sys.stdout.write("*")
+ sys.stdout.flush()
except Exception, e:
if opt_debug:
raise
notify_log("ERROR %s\n%s" % (e, format_exception()))
-# Note: The values of the context are *always* unicode!
-def notify_get_context():
- global g_notify_readahead_buffer
- if opt_keepalive:
- # Context is line-by-line in g_notify_readahead_buffer
- this_part, rest = g_notify_readahead_buffer.split('\n\n', 1)
- g_notify_readahead_buffer = rest
- context = {}
- try:
- for line in this_part.split('\n'):
- varname, value = line.strip().split("=", 1)
- context[varname] = value
- except Exception, e: # line without '=' ignored or alerted
- if opt_debug:
- raise
- return context
- else:
- # Information about notification is excpected in the
- # environment in variables with the prefix NOTIFY_
- return dict([
- (var[7:], value)
- for (var, value)
- in os.environ.items()
- if var.startswith("NOTIFY_")
- and not re.match('^\$[A-Z]+\$$', value)])
+def notification_context_from_string(data):
+ # Context is line-by-line in g_notify_readahead_buffer
+ context = {}
+ try:
+ for line in data.split('\n'):
+ varname, value = line.strip().split("=", 1)
+ context[varname] = value
+ except Exception, e: # line without '=' ignored or alerted
+ if opt_debug:
+ raise
+ return context
+
+def notification_context_from_env():
+ # Information about notification is excpected in the
+ # environment in variables with the prefix NOTIFY_
+ return dict([
+ (var[7:], value)
+ for (var, value)
+ in os.environ.items()
+ if var.startswith("NOTIFY_")
+ and not re.match('^\$[A-Z]+\$$', value)])
def convert_context_to_unicode(context):
@@ -435,8 +445,7 @@ def convert_context_to_unicode(context):
value_unicode = u"(Invalid byte sequence)"
context[key] = value_unicode
-def notify_notify():
- context = notify_get_context()
+def notify_notify(context):
notify_log("Got notification context with %s variables" % len(context))
# Add a few further helper variables
@@ -516,7 +525,7 @@ def notify_via_email(context, write_into_spoolfile):
subject = substitute_context(subject_t, context)
context["SUBJECT"] = subject
body = substitute_context(notification_common_body + body_t, context)
- command = substitute_context(notification_mail_command, context)
+ command = substitute_context(notification_mail_command, context) + " >/dev/null 2>&1"
command_utf8 = command.encode("utf-8")
if notification_logging >= 2:
notify_log("Executing command: %s" % command)
@@ -528,6 +537,10 @@ def notify_via_email(context, write_into_spoolfile):
os.putenv("LANG", "C.UTF-8")
if notification_logging >= 2:
file(var_dir + "/notify/body.log", "w").write(body.encode("utf-8"))
+
+ # Important: we must not output anything on stdout or stderr. Data of stdout
+ # goes back into the socket to the CMC in keepalive mode and garbles the
+ # handshake signal.
return os.popen(command_utf8, "w").write(body.encode("utf-8"))
Module: check_mk
Branch: master
Commit: 3d433c095126fa1c2ce95a59564bf17e4dfd6622
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=3d433c095126fa…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Mon Jul 22 17:05:32 2013 +0200
FIX: fix logwatch pattern analyzer after direct access to the page
---
ChangeLog | 3 ++-
web/htdocs/wato.py | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/ChangeLog b/ChangeLog
index 3232296..d38b6d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -274,7 +274,8 @@
* FIX: avoid exception when called first time and first page ist host tags
* FIX: fix validation of time-of-day input field (24:00)
* FIX: automation users can now be deleted again (bug was introduced in 1.2.2p1)
-
+ * FIX: fix logwatch pattern analyzer message "The host xyz is not
+ managed by WATO." after direct access via snapin
1.2.2p2:
Core:
diff --git a/web/htdocs/wato.py b/web/htdocs/wato.py
index f35d554..ebdae6d 100644
--- a/web/htdocs/wato.py
+++ b/web/htdocs/wato.py
@@ -11841,6 +11841,7 @@ def mode_pattern_editor(phase):
html.text_input('match', cssclass = 'match', size=100)
forms.end()
html.button('_try', _('Try out'))
+ html.del_var('folder') # Never hand over the folder here
html.hidden_fields()
html.end_form()
documentation of error levels
Message-ID: <51ed4960.vstmQGYuYtXNeTId%bs(a)mathias-kettner.de>
User-Agent: Heirloom mailx 12.4 7/29/08
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Module: check_mk
Branch: master
Commit: 01b0c9a110bfb6f9425c161d86949da043abe734
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=01b0c9a110bfb6…
Author: Bernd Stroessenreuther <bs(a)mathias-kettner.de>
Date: Mon Jul 22 17:01:44 2013 +0200
check manpage for ibm_rsa_health: documentation of error levels
---
checkman/ibm_rsa_health | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/checkman/ibm_rsa_health b/checkman/ibm_rsa_health
index 200e5ea..2f7db99 100644
--- a/checkman/ibm_rsa_health
+++ b/checkman/ibm_rsa_health
@@ -4,16 +4,13 @@ catalog: hw/server/ibm
license: GPL
distribution: check_mk
description:
- BERND, FIXME: Ask Mathias in which cases WARN and CRIT are issued.
-
Checks systems health via IBM Remote Supervisor Adapter (RSA) by SNMP.
The system health includes health of fans, power supplys, etc.
Returns {OK} if all of the included devices report OK status.
- Returns {WARN} if ...
-
- Returns {CRIT} if ...
+ Returns {WARN} if the RSA reports a non critical problem and {CRIT} on
+ a system level problem.
inventory:
Returns one check per RSA.
Module: check_mk
Branch: master
Commit: 4c057669c6e2de60cb87b400e8875da65a2c4512
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=4c057669c6e2de…
Author: Bernd Stroessenreuther <bs(a)mathias-kettner.de>
Date: Mon Jul 22 16:54:37 2013 +0200
initial version of check manpage for statgrab_cpu and statgrab_load
---
checkman/statgrab_cpu | 7 +++++--
checkman/statgrab_load | 41 +++++++++++++++++++++++++++++++++--------
2 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/checkman/statgrab_cpu b/checkman/statgrab_cpu
index 1a74d29..fbc06b6 100644
--- a/checkman/statgrab_cpu
+++ b/checkman/statgrab_cpu
@@ -5,9 +5,12 @@ license: GPL
distribution: check_mk
description:
This check measures the CPU utilization during the last check
- cycle. A level can be set only on {wait} (disk wait).
+ cycle.
- BERND: Hinweis auf statgrab-Plugin (HP) bzw. statgrab-Befehl (beide).
+ Make sure you have the statgrab binary available on the monitored
+ machine. Under HP-UX you also need to activate the hpux_statgrab agent plugin.
+
+ The check allways returns {OK} status.
perfdata:
Three variables: The percentage of time spent in {user}, {system} and {wait}
diff --git a/checkman/statgrab_load b/checkman/statgrab_load
index 1896dbb..009e4a9 100644
--- a/checkman/statgrab_load
+++ b/checkman/statgrab_load
@@ -4,16 +4,41 @@ catalog: os/kernel
license: GPL
distribution: check_mk
description:
- This check measures the CPU utilization during the last check
- cycle. A level can be set only on {wait} (disk wait).
+ This check measures and checks the averaged CPU load during the last check
+ cycle by using the statgrab tool of the operating system. The values
+ for 1, 5 and 15 minute average are sent, although the PNP template
+ shipped with check_mk only displays the 1 and 15 min average load.
- BERND: Hinweis auf statgrab-Plugin (HP) bzw. statgrab-Befehl (beide).
+ Make sure you have the statgrab binary available on the monitored
+ machine. Under HP-UX you also need to activate the hpux_statgrab agent plugin.
+ Note: The CPU load is the average number of processes that are
+ currently in the state "running". Do not mix this up with the
+ CPU "utiliziation" (which measures the current usage of the CPU
+ in percent).
+
+examples:
+ # Set default levels to a load of 4 and 8
+ cpuload_default_levels = (4.0, 8.0).
+
+ checks += [
+ # Use levels 4 (warning) and 8 (critical)
+ # on all hosts with the tag "unix"
+ (["unix"], ALL_HOSTS, "statgrab_load", None, (4, 8)),
+ ]
perfdata:
- Three variables: The percentage of time spent in {user}, {system} and {wait}
- (disk wait). The idle time can be computed by substracting those three values
- from 100%. The user value contains also {nice}.
+ Three values: The load average for 1, 5 and 15 minutes.
inventory:
- One check per host is created if the agent contains a {statgrab_cpu}
- section containing cpu information.
+ One check per host is created if the agent contains a {statgrab_load}
+ section.
+
+[parameters]
+warning (int): 15 minute average load at which a {WARN} is triggered.
+critical (int): 15 minute average load at which a {CRIT} state is triggered.
+
+[configuration]
+cpuload_default_levels (int, int): A pair of floats or integers: The default
+ levels. These levels are used for inventorized checks. This variable
+ is preset to {(5, 10)}
+
Module: check_mk
Branch: master
Commit: 09fdb7ab46a8cef619097dcec2f0efa847b3eb33
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=09fdb7ab46a8ce…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Mon Jul 22 16:42:57 2013 +0200
FIX: automation users can now be deleted again (bug was introduced in 1.2.2p1)
---
ChangeLog | 1 +
web/htdocs/userdb.py | 8 ++++++++
2 files changed, 9 insertions(+)
diff --git a/ChangeLog b/ChangeLog
index b168a8d..3232296 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -273,6 +273,7 @@
* FIX: fix layout of Auxiliary tags table
* FIX: avoid exception when called first time and first page ist host tags
* FIX: fix validation of time-of-day input field (24:00)
+ * FIX: automation users can now be deleted again (bug was introduced in 1.2.2p1)
1.2.2p2:
diff --git a/web/htdocs/userdb.py b/web/htdocs/userdb.py
index 73d7c90..1b60826 100644
--- a/web/htdocs/userdb.py
+++ b/web/htdocs/userdb.py
@@ -394,6 +394,14 @@ def save_users(profiles):
# entry = dir + "/" + e
# if os.path.isdir(entry):
# shutil.rmtree(entry)
+ # But for the automation.secret this is ok, since automation users are not
+ # created by other sources in common cases
+ dir = defaults.var_dir + "/web"
+ for user_dir in os.listdir(defaults.var_dir + "/web"):
+ if user_dir not in ['.', '..'] and user_dir not in profiles:
+ entry = dir + "/" + user_dir
+ if os.path.isdir(entry) and os.path.exists(entry + '/automation.secret'):
+ os.unlink(entry + '/automation.secret')
# Release the lock to make other threads access possible again asap
# This lock is set by load_users() only in the case something is expected
Module: check_mk
Branch: master
Commit: 224be905641ff522f27194649614e3d9113d6afc
URL: http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=224be905641ff5…
Author: Bernd Stroessenreuther <bs(a)mathias-kettner.de>
Date: Mon Jul 22 16:20:22 2013 +0200
initial version of check manpage for ibm_rsa_health and lnx_thermal
---
checkman/ibm_rsa_health | 14 +++++++++++++-
checkman/lnx_thermal | 23 ++++++++++++++++++++++-
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/checkman/ibm_rsa_health b/checkman/ibm_rsa_health
index ae4ca9e..200e5ea 100644
--- a/checkman/ibm_rsa_health
+++ b/checkman/ibm_rsa_health
@@ -4,4 +4,16 @@ catalog: hw/server/ibm
license: GPL
distribution: check_mk
description:
- BERND
+ BERND, FIXME: Ask Mathias in which cases WARN and CRIT are issued.
+
+ Checks systems health via IBM Remote Supervisor Adapter (RSA) by SNMP.
+ The system health includes health of fans, power supplys, etc.
+
+ Returns {OK} if all of the included devices report OK status.
+
+ Returns {WARN} if ...
+
+ Returns {CRIT} if ...
+
+inventory:
+ Returns one check per RSA.
diff --git a/checkman/lnx_thermal b/checkman/lnx_thermal
index 0e14d3b..9aed910 100644
--- a/checkman/lnx_thermal
+++ b/checkman/lnx_thermal
@@ -4,4 +4,25 @@ catalog: os/hardware
license: GPL
distribution: check_mk
description:
- BERND: verwendet /proc/sys/wasweisich...
+ This check uses the temperature information provided by the kernel in
+ /sys/class/thermal/thermal_zone* and reports values in °C
+
+ The measured value is matched against levels delivered by the kernel.
+ Returns {CRIT} if the kernel reports {hot} or {critical} state,
+ {WARN} on any other non ok state and {OK} if the temperature is
+ considered to be within normal operation temperature levels.
+
+item:
+ The ID of the thermal zone
+
+examples:
+ # adding a check for /sys/class/thermal/thermal_zone0
+ checks += [
+ ( "localhost", "lnx_thermal", "0", None )
+ ]
+
+perfdata:
+ One value: The current temperature in °C
+
+inventory:
+ Creates one check for earch thermal zone.