Merge "Emit some nodepool stats" into feature/zuulv3
diff --git a/doc/source/admin/monitoring.rst b/doc/source/admin/monitoring.rst
index dc6be0d..55f1908 100644
--- a/doc/source/admin/monitoring.rst
+++ b/doc/source/admin/monitoring.rst
@@ -146,6 +146,84 @@
 
       The one-minute load average of this executor, multiplied by 100.
 
+.. stat:: zuul.nodepool
+
+   Holds metrics related to Zuul requests from Nodepool.
+
+   .. stat:: requested
+      :type: counter
+
+      Incremented each time a node request is submitted to Nodepool.
+
+      .. stat:: label.<label>
+         :type: counter
+
+         Incremented each time a request for a specific label is
+         submitted to Nodepool.
+
+      .. stat:: size.<size>
+         :type: counter
+
+         Incremented each time a request of a specific size is submitted
+         to Nodepool.  For example, a request for 3 nodes would use the
+         key ``zuul.nodepool.requested.size.3``.
+
+   .. stat:: canceled
+      :type: counter, timer
+
+      The counter is incremented each time a node request is canceled
+      by Zuul.  The timer records the elapsed time from request to
+      cancelation.
+
+      .. stat:: label.<label>
+         :type: counter, timer
+
+         The same, for a specific label.
+
+      .. stat:: size.<size>
+         :type: counter, timer
+
+         The same, for a specific request size.
+
+   .. stat:: fulfilled
+      :type: counter, timer
+
+      The counter is incremented each time a node request is fulfilled
+      by Nodepool.  The timer records the elapsed time from request to
+      fulfillment.
+
+      .. stat:: label.<label>
+         :type: counter, timer
+
+         The same, for a specific label.
+
+      .. stat:: size.<size>
+         :type: counter, timer
+
+         The same, for a specific request size.
+
+   .. stat:: failed
+      :type: counter, timer
+
+      The counter is incremented each time Nodepool fails to fulfill a
+      node request.  The timer records the elapsed time from request
+      to failure.
+
+      .. stat:: label.<label>
+         :type: counter, timer
+
+         The same, for a specific label.
+
+      .. stat:: size.<size>
+         :type: counter, timer
+
+         The same, for a specific request size.
+
+   .. stat:: current_requests
+      :type: gauge
+
+      The number of outstanding nodepool requests from Zuul.
+
 
 As an example, given a job named `myjob` in `mytenant` triggered by a
 change to `myproject` on the `master` branch in the `gate` pipeline
diff --git a/tests/unit/test_nodepool.py b/tests/unit/test_nodepool.py
index d51898b..d3f9ddb 100644
--- a/tests/unit/test_nodepool.py
+++ b/tests/unit/test_nodepool.py
@@ -29,6 +29,7 @@
     def setUp(self):
         super(TestNodepool, self).setUp()
 
+        self.statsd = None
         self.zk_chroot_fixture = self.useFixture(
             ChrootedKazooFixture(self.id()))
         self.zk_config = '%s:%s%s' % (
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index ab0d0fd..bab5162 100755
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -119,6 +119,14 @@
             'org_project.master.total_changes', value='1|c')
         exec_key = 'zuul.executor.%s' % self.executor_server.hostname
         self.assertReportedStat(exec_key + '.builds', value='1|c')
+        self.assertReportedStat('zuul.nodepool.requested', value='1|c')
+        self.assertReportedStat('zuul.nodepool.requested.label.label1',
+                                value='1|c')
+        self.assertReportedStat('zuul.nodepool.fulfilled.label.label1',
+                                value='1|c')
+        self.assertReportedStat('zuul.nodepool.requested.size.1', value='1|c')
+        self.assertReportedStat('zuul.nodepool.fulfilled.size.1', value='1|c')
+        self.assertReportedStat('zuul.nodepool.current_requests', value='1|g')
 
         for build in self.history:
             self.assertTrue(build.parameters['zuul']['voting'])
diff --git a/zuul/model.py b/zuul/model.py
index 6eebbfb..b7c3031 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -523,6 +523,7 @@
         self.job = job
         self.nodeset = nodeset
         self._state = STATE_REQUESTED
+        self.requested_time = time.time()
         self.state_time = time.time()
         self.stat = None
         self.uid = uuid4().hex
diff --git a/zuul/nodepool.py b/zuul/nodepool.py
index f4c850d..7dafca0 100644
--- a/zuul/nodepool.py
+++ b/zuul/nodepool.py
@@ -22,6 +22,35 @@
         self.requests = {}
         self.sched = scheduler
 
+    def emitStats(self, request):
+        if not self.sched.statsd:
+            return
+        statsd = self.sched.statsd
+        # counter zuul.nodepool.requested
+        # counter zuul.nodepool.requested.label.<label>
+        # counter zuul.nodepool.requested.size.<size>
+        # gauge zuul.nodepool.current_requests
+        state = request.state
+        if request.canceled:
+            state = 'canceled'
+            dt = None
+        elif request.state in (model.STATE_FULFILLED, model.STATE_FAILED):
+            dt = int((request.state_time - request.requested_time) * 1000)
+        else:
+            dt = None
+        key = 'zuul.nodepool.%s' % state
+        statsd.incr(key)
+        if dt:
+            statsd.timing(key, dt)
+        for node in request.nodeset.getNodes():
+            statsd.incr(key + '.label.%s' % node.label)
+            if dt:
+                statsd.timing(key + '.label.%s' % node.label, dt)
+        statsd.incr(key + '.size.%s' % len(request.nodeset.nodes))
+        if dt:
+            statsd.timing(key + '.size.%s' % len(request.nodeset.nodes), dt)
+        statsd.gauge('zuul.nodepool.current_requests', len(self.requests))
+
     def requestNodes(self, build_set, job):
         # Create a copy of the nodeset to represent the actual nodes
         # returned by nodepool.
@@ -33,6 +62,7 @@
             self.sched.zk.submitNodeRequest(req, self._updateNodeRequest)
             # Logged after submission so that we have the request id
             self.log.info("Submited node request %s" % (req,))
+            self.emitStats(req)
         else:
             self.log.info("Fulfilling empty node request %s" % (req,))
             req.state = model.STATE_FULFILLED
@@ -142,6 +172,7 @@
 
         if request.canceled:
             del self.requests[request.uid]
+            self.emitStats(request)
             return False
 
         # TODOv3(jeblair): handle allocation failure
@@ -156,6 +187,8 @@
             self.sched.onNodesProvisioned(request)
             del self.requests[request.uid]
 
+            self.emitStats(request)
+
             # Stop watching this request node.
             return False