Merge "Emit some stats from executor" into feature/zuulv3
diff --git a/doc/source/admin/monitoring.rst b/doc/source/admin/monitoring.rst
index 7c2ac80..dc6be0d 100644
--- a/doc/source/admin/monitoring.rst
+++ b/doc/source/admin/monitoring.rst
@@ -32,7 +32,7 @@
    Zuul will report counters for each type of event it receives from
    each of its configured drivers.
 
-.. stat:: zuul.<tenant>.pipeline
+.. stat:: zuul.tenant.<tenant>.pipeline
 
    Holds metrics specific to jobs. This hierarchy includes:
 
@@ -125,6 +125,27 @@
          How long each item spent in the pipeline before its first job
          started.
 
+.. stat:: zuul.executor.<executor>
+
+   Holds metrics emitted by individual executors.  The ``<executor>``
+   component of the key will be replaced with the hostname of the
+   executor.
+
+   .. stat:: builds
+      :type: counter
+
+      Incremented each time the executor starts a build.
+
+   .. stat:: running_builds
+      :type: gauge
+
+      The number of builds currently running on this executor.
+
+   .. stat:: load_average
+      :type: gauge
+
+      The one-minute load average of this executor, multiplied by 100.
+
 
 As an example, given a job named `myjob` in `mytenant` triggered by a
 change to `myproject` on the `master` branch in the `gate` pipeline
diff --git a/tests/base.py b/tests/base.py
index 028a194..035ff0c 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -1429,6 +1429,9 @@
         be explicitly released.
 
     """
+
+    _job_class = RecordingAnsibleJob
+
     def __init__(self, *args, **kw):
         self._run_ansible = kw.pop('_run_ansible', False)
         self._test_root = kw.pop('_test_root', False)
@@ -1483,8 +1486,7 @@
         args = json.loads(job.arguments)
         args['zuul']['_test'] = dict(test_root=self._test_root)
         job.arguments = json.dumps(args)
-        self.job_workers[job.unique] = RecordingAnsibleJob(self, job)
-        self.job_workers[job.unique].run()
+        super(RecordingExecutorServer, self).executeJob(job)
 
     def stopJob(self, job):
         self.log.debug("handle stop")
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index c1f0a0e..ab0d0fd 100755
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -117,6 +117,8 @@
         self.assertReportedStat(
             'zuul.tenant.tenant-one.pipeline.gate.project.review_example_com.'
             'org_project.master.total_changes', value='1|c')
+        exec_key = 'zuul.executor.%s' % self.executor_server.hostname
+        self.assertReportedStat(exec_key + '.builds', value='1|c')
 
         for build in self.history:
             self.assertTrue(build.parameters['zuul']['voting'])
diff --git a/zuul/executor/server.py b/zuul/executor/server.py
index 670a420..d9edc24 100644
--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@@ -29,6 +29,7 @@
 import traceback
 from zuul.lib.yamlutil import yaml
 from zuul.lib.config import get_default
+from zuul.lib.statsd import get_statsd
 
 try:
     import ara.plugins.callbacks as ara_callbacks
@@ -1483,6 +1484,7 @@
 
 class ExecutorServer(object):
     log = logging.getLogger("zuul.ExecutorServer")
+    _job_class = AnsibleJob
 
     def __init__(self, config, connections={}, jobdir_root=None,
                  keep_jobdir=False, log_streaming_port=DEFAULT_FINGER_PORT):
@@ -1506,6 +1508,7 @@
             nokeep=self.nokeep,
         )
 
+        self.statsd = get_statsd(config)
         self.merge_root = get_default(self.config, 'executor', 'git_dir',
                                       '/var/lib/zuul/executor-git')
         self.default_username = get_default(self.config, 'executor',
@@ -1652,6 +1655,10 @@
                                    "to worker:")
         self.merger_worker.shutdown()
         self.executor_worker.shutdown()
+        if self.statsd:
+            base_key = 'zuul.executor.%s' % self.hostname
+            self.statsd.gauge(base_key + '.load_average', 0)
+            self.statsd.gauge(base_key + '.running_builds', 0)
         self.log.debug("Stopped")
 
     def pause(self):
@@ -1776,7 +1783,10 @@
             self.manageLoad()
 
     def executeJob(self, job):
-        self.job_workers[job.unique] = AnsibleJob(self, job)
+        if self.statsd:
+            base_key = 'zuul.executor.%s' % self.hostname
+            self.statsd.incr(base_key + '.builds')
+        self.job_workers[job.unique] = self._job_class(self, job)
         self.job_workers[job.unique].run()
 
     def manageLoad(self):
@@ -1795,6 +1805,12 @@
                 "Re-registering as load is within limits {} <= {}".format(
                     load_avg, self.max_load_avg))
             self.register_work()
+        if self.statsd:
+            base_key = 'zuul.executor.%s' % self.hostname
+            self.statsd.gauge(base_key + '.load_average',
+                              int(load_avg * 100))
+            self.statsd.gauge(base_key + '.running_builds',
+                              len(self.job_workers))
 
     def finishJob(self, unique):
         del(self.job_workers[unique])