Merge "Emit some stats from executor" into feature/zuulv3
diff --git a/doc/source/admin/monitoring.rst b/doc/source/admin/monitoring.rst
index 7c2ac80..dc6be0d 100644
--- a/doc/source/admin/monitoring.rst
+++ b/doc/source/admin/monitoring.rst
@@ -32,7 +32,7 @@
Zuul will report counters for each type of event it receives from
each of its configured drivers.
-.. stat:: zuul.<tenant>.pipeline
+.. stat:: zuul.tenant.<tenant>.pipeline
Holds metrics specific to jobs. This hierarchy includes:
@@ -125,6 +125,27 @@
How long each item spent in the pipeline before its first job
started.
+.. stat:: zuul.executor.<executor>
+
+ Holds metrics emitted by individual executors. The ``<executor>``
+ component of the key will be replaced with the hostname of the
+ executor.
+
+ .. stat:: builds
+ :type: counter
+
+ Incremented each time the executor starts a build.
+
+ .. stat:: running_builds
+ :type: gauge
+
+ The number of builds currently running on this executor.
+
+ .. stat:: load_average
+ :type: gauge
+
+ The one-minute load average of this executor, multiplied by 100.
+
As an example, given a job named `myjob` in `mytenant` triggered by a
change to `myproject` on the `master` branch in the `gate` pipeline
diff --git a/tests/base.py b/tests/base.py
index 028a194..035ff0c 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -1429,6 +1429,9 @@
be explicitly released.
"""
+
+ _job_class = RecordingAnsibleJob
+
def __init__(self, *args, **kw):
self._run_ansible = kw.pop('_run_ansible', False)
self._test_root = kw.pop('_test_root', False)
@@ -1483,8 +1486,7 @@
args = json.loads(job.arguments)
args['zuul']['_test'] = dict(test_root=self._test_root)
job.arguments = json.dumps(args)
- self.job_workers[job.unique] = RecordingAnsibleJob(self, job)
- self.job_workers[job.unique].run()
+ super(RecordingExecutorServer, self).executeJob(job)
def stopJob(self, job):
self.log.debug("handle stop")
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index c1f0a0e..ab0d0fd 100755
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -117,6 +117,8 @@
self.assertReportedStat(
'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
'org_project.master.total_changes', value='1|c')
+ exec_key = 'zuul.executor.%s' % self.executor_server.hostname
+ self.assertReportedStat(exec_key + '.builds', value='1|c')
for build in self.history:
self.assertTrue(build.parameters['zuul']['voting'])
diff --git a/zuul/executor/server.py b/zuul/executor/server.py
index 670a420..d9edc24 100644
--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@@ -29,6 +29,7 @@
import traceback
from zuul.lib.yamlutil import yaml
from zuul.lib.config import get_default
+from zuul.lib.statsd import get_statsd
try:
import ara.plugins.callbacks as ara_callbacks
@@ -1483,6 +1484,7 @@
class ExecutorServer(object):
log = logging.getLogger("zuul.ExecutorServer")
+ _job_class = AnsibleJob
def __init__(self, config, connections={}, jobdir_root=None,
keep_jobdir=False, log_streaming_port=DEFAULT_FINGER_PORT):
@@ -1506,6 +1508,7 @@
nokeep=self.nokeep,
)
+ self.statsd = get_statsd(config)
self.merge_root = get_default(self.config, 'executor', 'git_dir',
'/var/lib/zuul/executor-git')
self.default_username = get_default(self.config, 'executor',
@@ -1652,6 +1655,10 @@
"to worker:")
self.merger_worker.shutdown()
self.executor_worker.shutdown()
+ if self.statsd:
+ base_key = 'zuul.executor.%s' % self.hostname
+ self.statsd.gauge(base_key + '.load_average', 0)
+ self.statsd.gauge(base_key + '.running_builds', 0)
self.log.debug("Stopped")
def pause(self):
@@ -1776,7 +1783,10 @@
self.manageLoad()
def executeJob(self, job):
- self.job_workers[job.unique] = AnsibleJob(self, job)
+ if self.statsd:
+ base_key = 'zuul.executor.%s' % self.hostname
+ self.statsd.incr(base_key + '.builds')
+ self.job_workers[job.unique] = self._job_class(self, job)
self.job_workers[job.unique].run()
def manageLoad(self):
@@ -1795,6 +1805,12 @@
"Re-registering as load is within limits {} <= {}".format(
load_avg, self.max_load_avg))
self.register_work()
+ if self.statsd:
+ base_key = 'zuul.executor.%s' % self.hostname
+ self.statsd.gauge(base_key + '.load_average',
+ int(load_avg * 100))
+ self.statsd.gauge(base_key + '.running_builds',
+ len(self.job_workers))
def finishJob(self, unique):
del(self.job_workers[unique])