Module: check_mk
Branch: master
Commit: 8a528b3c0d25addcc138c75a1b735b348331080d
URL:
http://git.mathias-kettner.de/git/?p=check_mk.git;a=commit;h=8a528b3c0d25ad…
Author: Lars Michelsen <lm(a)mathias-kettner.de>
Date: Mon Dec 19 20:13:48 2016 +0100
Reduced load caused by GUI crawl tests; made test more reliable
Change-Id: I8d8367a3749df569150c714397ffcc25c28378fe
---
tests/testlib/__init__.py | 45 +++++++++++++++++++--------------------------
tests/web/test_crawl.py | 28 ++++++++++------------------
2 files changed, 29 insertions(+), 44 deletions(-)
diff --git a/tests/testlib/__init__.py b/tests/testlib/__init__.py
index 201934e..aef8123 100644
--- a/tests/testlib/__init__.py
+++ b/tests/testlib/__init__.py
@@ -434,6 +434,8 @@ class Site(object):
class WebSession(requests.Session):
def __init__(self):
self.transids = []
+ # Resources are
+ self.verified_resources = set()
super(WebSession, self).__init__()
@@ -541,50 +543,41 @@ class WebSession(requests.Session):
def _check_html_page_resources(self, response):
soup = BeautifulSoup(response.text, "lxml")
- parsed_url = urlparse(response.url)
-
- base_url = parsed_url.path
- if ".py" in base_url:
- base_url = os.path.dirname(base_url)
-
# There might be other resources like iframe, audio, ... but we don't care
about them
- for img_url in self._find_resource_urls("img", "src", soup):
- assert not img_url.startswith("/"), "%s starts with /" %
img_url
- req = self.get(base_url + "/" + img_url, proto=parsed_url.scheme,
verify=False)
-
- mime_type = self._get_mime_type(req)
- assert mime_type in [ "image/png" ]
+ self._check_resources(soup, response, "img", "src", [
"image/png"])
+ self._check_resources(soup, response, "script", "src", [
"application/javascript"])
+ self._check_resources(soup, response, "link", "href", [
"text/css"], filters=[("rel", "stylesheet")])
+ self._check_resources(soup, response, "link", "href", [
"image/vnd.microsoft.icon"], filters=[("rel", "shortcut
icon")])
- for script_url in self._find_resource_urls("script", "src",
soup):
- assert not script_url.startswith("/")
- req = self.get(base_url + "/" + script_url,
proto=parsed_url.scheme, verify=False)
- mime_type = self._get_mime_type(req)
- assert mime_type in [ "application/javascript" ]
+ def _check_resources(self, soup, response, tag, attr, allowed_mime_types,
filters=None):
+ parsed_url = urlparse(response.url)
- for css_url in self._find_resource_urls("link", "href", soup,
filters=[("rel", "stylesheet")]):
- assert not css_url.startswith("/")
- req = self.get(base_url + "/" + css_url, proto=parsed_url.scheme,
verify=False)
+ base_url = parsed_url.path
+ if ".py" in base_url:
+ base_url = os.path.dirname(base_url)
- mime_type = self._get_mime_type(req)
- assert mime_type in [ "text/css" ]
+ for url in self._find_resource_urls(tag, attr, soup, filters):
+ # Only check resources once per session
+ if url in self.verified_resources:
+ continue
+ self.verified_resources.add(url)
- for url in self._find_resource_urls("link", "href", soup,
filters=[("rel", "shortcut icon")]):
assert not url.startswith("/")
req = self.get(base_url + "/" + url, proto=parsed_url.scheme,
verify=False)
mime_type = self._get_mime_type(req)
- assert mime_type in [ "image/vnd.microsoft.icon" ]
+ assert mime_type in allowed_mime_types
- def _find_resource_urls(self, tag, attribute, soup, filters=[]):
+ def _find_resource_urls(self, tag, attribute, soup, filters=None):
urls = []
for element in soup.findAll(tag):
try:
skip = False
- for attr, val in filters:
+ for attr, val in filters or []:
if element[attr] != val:
skip = True
break
diff --git a/tests/web/test_crawl.py b/tests/web/test_crawl.py
index 7af4a68..5876d6a 100644
--- a/tests/web/test_crawl.py
+++ b/tests/web/test_crawl.py
@@ -61,9 +61,9 @@ class Worker(threading.Thread):
except Exception, e:
self.error(url, "Failed to visit: %s\n%s" %
(e, traceback.format_exc()))
- self.idle = True
self.crawler.todo.task_done()
except Queue.Empty:
+ self.idle = True
time.sleep(0.5)
@@ -82,7 +82,7 @@ class Worker(threading.Thread):
started = time.time()
try:
- #print url.url_without_host()
+ #print "FETCH", url.url_without_host()
response = self.crawler.client.get(url.url_without_host())
except AssertionError, e:
if "This view can only be used in mobile mode" in "%s" %
e:
@@ -142,10 +142,9 @@ class Worker(threading.Thread):
def check_response(self, url, response):
soup = BeautifulSoup(response.text, "lxml")
+ # The referenced resources (images, stylesheets, javascript files) are checked
by
+ # the generic web client handler. This only needs to reaslize the crawling.
self.check_content(url, response, soup)
- self.check_images(url, soup)
- self.check_styles(url, soup)
- self.check_scripts(url, soup)
self.check_links(url, soup)
self.check_frames(url, soup)
self.check_iframes(url, soup)
@@ -181,10 +180,6 @@ class Worker(threading.Thread):
self.check_referenced(url, soup, "a", "href")
- def check_images(self, url, soup):
- self.check_referenced(url, soup, "img", "src")
-
-
def check_referenced(self, referer_url, soup, tag, attr):
elements = soup.find_all(tag)
@@ -193,17 +188,10 @@ class Worker(threading.Thread):
url = self.normalize_url(self.crawler.site.internal_url, orig_url)
if url is not None and self.is_valid_url(url) \
- and url not in self.crawler.visited:
+ and url not in self.crawler.handled:
#file("/tmp/todo", "a").write("%s (%s)\n" %
(url, referer_url.url))
self.crawler.todo.put(Url(url, orig_url=orig_url,
referer_url=referer_url.url))
-
-
- def check_styles(self, url, soup):
- pass # TODO
-
-
- def check_scripts(self, url, soup):
- pass # TODO
+ self.crawler.handled.add(url)
def is_valid_url(self, url):
@@ -276,6 +264,9 @@ class TestCrawler(object):
self.todo = SetQueue()
self.started = time.time()
self.visited = []
+ # Contains all already seen and somehow handled URLs. Something like the
+ # summary of self.todo and self.handled but todo contains Url() objects.
+ self.handled = set()
self.errors = []
self.site = site
self.client = web
@@ -284,6 +275,7 @@ class TestCrawler(object):
self.load_stats()
self.todo.put(Url(site.internal_url))
+ self.handled.add(site.internal_url)
self.crawl()