Module: check_mk
Branch: master
Commit: 66dc43be622b2738f5ab7d905671943e6cf78cf0
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=66dc43be622b27…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Wed Aug 28 15:14:55 2013 +0200
Trying to improve ldap connection problem handling during sync runs; improved logging
---
web/plugins/userdb/ldap.py | 54 +++++++++++++++++++++++++++++++++-----------
1 file changed, 41 insertions(+), 13 deletions(-)
diff --git a/web/plugins/userdb/ldap.py b/web/plugins/userdb/ldap.py
index 488fccb..2ed66a3 100644
--- a/web/plugins/userdb/ldap.py
+++ b/web/plugins/userdb/ldap.py
@@ -25,7 +25,7 @@
# Boston, MA 02110-1301 USA.
import config, defaults
-import time, copy
+import time, copy, traceback
try:
# docs:
http://www.python-ldap.org/doc/html/index.html
@@ -49,6 +49,8 @@ g_ldap_group_cache = {}
# File for storing the time of the last success event
g_ldap_sync_time_file = defaults.var_dir + '/web/ldap_sync_time.mk'
+# Exists when last ldap sync failed, contains exception text
+g_ldap_sync_fail_file = defaults.var_dir + '/web/ldap_sync_fail.mk'
# LDAP attributes are case insensitive, we only use lower case!
# Please note: This are only default values. The user might override this
@@ -93,7 +95,8 @@ ldap_filter_map = {
def ldap_log(s):
if config.ldap_debug_log is not None:
- file(ldap_replace_macros(config.ldap_debug_log),
"a").write('%s\n' % s)
+ file(ldap_replace_macros(config.ldap_debug_log), "a").write('%s
%s\n' %
+ (time.strftime('%Y-%m-%d %H:%M:%S'),
s))
class MKLDAPException(MKGeneralException):
pass
@@ -128,6 +131,7 @@ def ldap_connect_server(server):
conn = ldap.ldapobject.ReconnectLDAPObject(uri)
conn.protocol_version = config.ldap_connection['version']
conn.network_timeout = config.ldap_connection.get('connect_timeout',
2.0)
+ conn.retry_delay = 0.5
# When using the domain top level as base-dn, the subtree search stumbles with
referral objects.
# whatever. We simply disable them here when using active directory. Hope this
fixes all problems.
@@ -188,6 +192,7 @@ def ldap_connect(enforce_new = False, enforce_server = None):
else:
errors.append(error_msg)
+ # Got no connection to any server
if ldap_connection is None:
raise MKLDAPException(_('The LDAP connector is unable to connect to the
LDAP server.\n%s') %
('<br />\n'.join(errors)))
@@ -196,7 +201,9 @@ def ldap_connect(enforce_new = False, enforce_server = None):
ldap_connection_options = config.ldap_connection
except Exception:
- ldap_connection = None # Invalidate connection on failure
+ # Invalidate connection on failure
+ ldap_connection = None
+ ldap_connection_options = None
raise
# Bind with the default credentials
@@ -306,11 +313,12 @@ def ldap_search(base, filt = '(objectclass=*)', columns =
[], scope = None):
# In some environments, the connection to the LDAP server does not seem to
# be as stable as it is needed. So we try to repeat the query for three times.
- tries_left = 3
+ tries_left = 2
success = False
while not success:
tries_left -= 1
try:
+ ldap_connect()
result = []
try:
search_func = config.ldap_connection.get('page_size') \
@@ -335,12 +343,13 @@ def ldap_search(base, filt = '(objectclass=*)', columns =
[], scope = None):
'a sizelimit configuration on the LDAP
server.<br />Throwing away the '
'incomplete results. You should change the
scope of operation '
'within the ldap or adapt the limit settings
of the LDAP server.'))
- except ldap.SERVER_DOWN:
+ except (ldap.SERVER_DOWN, ldap.TIMEOUT, MKLDAPException), e:
if tries_left:
- ldap_log(' Received SERVER_DOWN. Retrying...')
- ldap_connection.reconnect(ldap_connection._uri)
+ ldap_log(' Received %r. Retrying with clean connection...' % e)
+ ldap_disconnect()
time.sleep(0.5)
else:
+ ldap_log(' Giving up.')
break
duration = time.time() - start_time
@@ -837,8 +846,6 @@ def ldap_sync(add_to_changelog, only_username):
start_time = time.time()
- ldap_connect()
-
ldap_log(' SYNC PLUGINS: %s' % ',
'.join(config.ldap_active_plugins.keys()))
# Unused at the moment, always sync all users
@@ -846,10 +853,11 @@ def ldap_sync(add_to_changelog, only_username):
#if only_username:
# filt = '(%s=%s)' % (ldap_user_id_attr(), only_username)
- import wato
- users = load_users(lock = True)
ldap_users = ldap_get_users()
+ import wato
+ users = load_users(lock = True)
+
# Remove users which are controlled by this connector but can not be found in
# LDAP anymore
for user_id, user in users.items():
@@ -897,6 +905,12 @@ def ldap_sync(add_to_changelog, only_username):
duration = time.time() - start_time
ldap_log('SYNC FINISHED - Duration: %0.3f sec' % duration)
+ # delete the fail flag file after successful sync
+ try:
+ os.unlink(g_ldap_sync_fail_file)
+ except OSError:
+ pass
+
save_users(users)
# Calculates the attributes of the users which are locked for users managed
@@ -930,11 +944,25 @@ def ldap_page():
except:
last_sync_time = 0
- if last_sync_time + config.ldap_cache_livetime > time.time():
+ # in case of sync problems, synchronize all 20 seconds, instead of the configured
+ # regular cache livetime
+ if os.path.exists(g_ldap_sync_fail_file):
+ cache_livetime = 20
+ else:
+ cache_livetime = config.ldap_cache_livetime
+
+ if last_sync_time + cache_livetime > time.time():
return # No action needed, cache is recent enough
# ok, cache is too old. Act!
- ldap_sync(False, None)
+ try:
+ ldap_sync(False, None)
+ except:
+ # Do not let the exception through to the user. Instead write last
+ # error in a state file which is then visualized for the admin and
+ # will be deleted upon next successful sync.
+ file(g_ldap_sync_fail_file, 'w').write('%s\n%s' %
(time.strftime('%Y-%m-%d %H:%M:%S'),
+ traceback.format_exc()))
multisite_user_connectors.append({
'id': 'ldap',