Module: check_mk
Branch: master
Commit: a4e04dc922427052f917f15e7b09b606b79e27b1
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=a4e04dc9224270…
Author: Tom Baerwinkel <tb(a)mathias-kettner.de>
Date: Mon Nov 26 18:38:59 2018 +0100
agent_kubernetes: report CPU, memory and pod resources on the cluster
The k8s_resources check can now be used on the cluster as well.
Furthermore, the logic of merging dicts is now performed by the function
left_join_dicts.
CMK-511
Change-Id: I9189e8bf404a17817f041f6a9780d94595891450
---
agents/special/agent_kubernetes | 104 +++++++++++++++++++++++++---------------
checkman/k8s_resources.cpu | 2 +-
checkman/k8s_resources.memory | 2 +-
checkman/k8s_resources.pods | 2 +-
4 files changed, 69 insertions(+), 41 deletions(-)
diff --git a/agents/special/agent_kubernetes b/agents/special/agent_kubernetes
index 8b1b88b..919710d 100755
--- a/agents/special/agent_kubernetes
+++ b/agents/special/agent_kubernetes
@@ -35,9 +35,11 @@ from __future__ import (
import argparse
from collections import OrderedDict, Sequence
+import functools
import itertools
import json
import logging
+import operator
import os
import sys
import time
@@ -129,6 +131,19 @@ def parse_memory(value):
return float(value)
+def left_join_dicts(initial, new, operation):
+ d = {}
+ for key, value in initial.iteritems():
+ if isinstance(value, dict):
+ d[key] = left_join_dicts(value, new.get(key, {}), operation)
+ else:
+ if key in new:
+ d[key] = operation(value, new[key])
+ else:
+ d[key] = value
+ return d
+
+
class Metadata(object):
def __init__(self, metadata):
# type: (Optional[client.V1ObjectMeta]) -> None
@@ -159,24 +174,36 @@ class Node(Metadata):
return None
return {c.type: c.status for c in conditions}
+ @staticmethod
+ def zero_resources():
+ return {
+ 'capacity': {
+ 'cpu': 0.0,
+ 'memory': 0.0,
+ 'pods': 0,
+ },
+ 'allocatable': {
+ 'cpu': 0.0,
+ 'memory': 0.0,
+ 'pods': 0,
+ },
+ }
+
@property
def resources(self):
# type: () -> Dict[str, Dict[str, float]]
+ view = self.zero_resources()
if not self._status:
- return {}
- view, capacity, allocatable = {}, self._status.capacity,
self._status.allocatable
+ return view
+ capacity, allocatable = self._status.capacity, self._status.allocatable
if capacity:
- view['capacity'] = {
- 'cpu': parse_cpu(capacity.get('cpu', '0.0')),
- 'memory': parse_memory(capacity.get('memory',
'0.0')),
- 'pods': int(capacity.get('pods', '0')),
- }
+ view['capacity']['cpu'] +=
parse_cpu(capacity.get('cpu', '0.0'))
+ view['capacity']['memory'] +=
parse_memory(capacity.get('memory', '0.0'))
+ view['capacity']['pods'] += int(capacity.get('pods',
'0'))
if allocatable:
- view['allocatable'] = {
- 'cpu': parse_cpu(allocatable.get('cpu', '0.0')),
- 'memory': parse_memory(allocatable.get('memory',
'0.0')),
- 'pods': int(allocatable.get('pods', '0')),
- }
+ view['allocatable']['cpu'] +=
parse_cpu(allocatable.get('cpu', '0.0'))
+ view['allocatable']['memory'] +=
parse_memory(allocatable.get('memory', '0.0'))
+ view['allocatable']['pods'] +=
int(allocatable.get('pods', '0'))
return view
@@ -202,9 +229,9 @@ class Pod(Metadata):
self.node = spec.node_name if spec else None
self.containers = spec.containers if spec else []
- @property
- def resources(self):
- view = {
+ @staticmethod
+ def zero_resources():
+ return {
'limits': {
'cpu': 0.0,
'memory': 0.0,
@@ -214,6 +241,10 @@ class Pod(Metadata):
'memory': 0.0,
}
}
+
+ @property
+ def resources(self):
+ view = self.zero_resources()
for container in self.containers:
resources = container.resources
if not resources:
@@ -354,6 +385,11 @@ class NodeList(ListLike[Node]):
# type: () -> Dict[str, Dict[str, Dict[str, Optional[float]]]]
return {node.name: node.resources for node in self if node.name}
+ def cluster_resources(self):
+ initial = Node.zero_resources()
+ merge = functools.partial(left_join_dicts, operation=operator.add)
+ return reduce(merge, self.resources().itervalues(), initial)
+
class ComponentStatusList(ListLike[ComponentStatus]):
def list_statuses(self):
@@ -368,6 +404,9 @@ class PodList(ListLike[Pod]):
by_node = itertools.groupby(pods_sorted, lambda pod: pod.node)
return {node: {'allocations': {'pods': len(list(pods))}} for
node, pods in by_node}
+ def pods_in_cluster(self):
+ return {'allocations': {'pods': len(self)}}
+
def resources_per_node(self):
# type: () -> Dict[str, Dict[str, Dict[str, float]]]
"""
@@ -375,32 +414,18 @@ class PodList(ListLike[Pod]):
one container does not specify a limit, infinity is returned as the container
may consume any amount of resources.
"""
- initial = {
- 'limits': {
- 'cpu': 0.0,
- 'memory': 0.0,
- },
- 'requests': {
- 'cpu': 0.0,
- 'memory': 0.0,
- },
- }
-
- def merge(res_a, res_b):
- return {
- 'limits': {
- 'cpu': res_a['limits']['cpu'] +
res_b['limits']['cpu'],
- 'memory': res_a['limits']['memory'] +
res_b['limits']['memory'],
- },
- 'requests': {
- 'cpu': res_a['requests']['cpu'] +
res_b['requests']['cpu'],
- 'memory': res_a['requests']['memory'] +
res_b['requests']['memory'],
- },
- }
pods_sorted = sorted(self, key=lambda pod: pod.node)
by_node = itertools.groupby(pods_sorted, lambda pod: pod.node)
- return {node: reduce(merge, [p.resources for p in pods], initial) for node, pods
in by_node}
+ merge = functools.partial(left_join_dicts, operation=operator.add)
+ return {
+ node: reduce(merge, [p.resources for p in pods
+ ], Pod.zero_resources()) for node, pods in by_node
+ }
+
+ def cluster_resources(self):
+ merge = functools.partial(left_join_dicts, operation=operator.add)
+ return reduce(merge, [p.resources for p in self], Pod.zero_resources())
class NamespaceList(ListLike[Namespace]):
@@ -595,6 +620,9 @@ class ApiData(object):
e.get('k8s_storage_classes').insert(self.storage_classes.list_storage_classes())
e.get('k8s_roles').insert(self.roles.list_roles())
e.get('k8s_roles').insert(self.cluster_roles.list_roles())
+ e.get('k8s_resources').insert(self.nodes.cluster_resources())
+ e.get('k8s_resources').insert(self.pods.cluster_resources())
+ e.get('k8s_resources').insert(self.pods.pods_in_cluster())
return '\n'.join(e.output())
def node_sections(self):
diff --git a/checkman/k8s_resources.cpu b/checkman/k8s_resources.cpu
index 5442cce..9764e7f 100644
--- a/checkman/k8s_resources.cpu
+++ b/checkman/k8s_resources.cpu
@@ -5,7 +5,7 @@ license: GPL
distribution: check_mk
description:
This check monitors the requested CPU as well as the CPU limits and capacity of
- a Kubernetes node. If at least one container running on a node does not specify
+ a Kubernetes entity. If at least one container running on a node does not specify
limits, the node has no applicable limits.
The check always returns {OK}.
diff --git a/checkman/k8s_resources.memory b/checkman/k8s_resources.memory
index 3da7fea..2b1681a 100644
--- a/checkman/k8s_resources.memory
+++ b/checkman/k8s_resources.memory
@@ -5,7 +5,7 @@ license: GPL
distribution: check_mk
description:
This check monitors the requested memory as well as the memory limits and capacity
- of a Kubernetes node. If at least one container running on a node does not specify
+ of a Kubernetes entity. If at least one container running on a node does not specify
limits, the node has no applicable limits.
The check always returns {OK}.
diff --git a/checkman/k8s_resources.pods b/checkman/k8s_resources.pods
index 87f52b4..8618ed3 100644
--- a/checkman/k8s_resources.pods
+++ b/checkman/k8s_resources.pods
@@ -4,7 +4,7 @@ catalog: app/kubernetes
license: GPL
distribution: check_mk
description:
- This check monitors the pods of a Kubernetes node.
+ This check monitors the pods of a Kubernetes entity.
It always returns {OK}.
inventory: