Merge "Report the per-job build wait time to graphite"
diff --git a/tests/base.py b/tests/base.py
index 5ddb160..abbdb0a 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -624,6 +624,8 @@
result = 'RUN_ERROR'
else:
data['result'] = result
+ data['node_labels'] = ['bare-necessities']
+ data['node_name'] = 'foo'
work_fail = False
changes = None
diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py
index 6e14ff5..59ac419 100644
--- a/zuul/cmd/client.py
+++ b/zuul/cmd/client.py
@@ -263,6 +263,12 @@
'number': {
'title': 'Number'
},
+ 'node_labels': {
+ 'title': 'Node Labels'
+ },
+ 'node_name': {
+ 'title': 'Node Name'
+ },
'worker.name': {
'title': 'Worker'
},
@@ -276,7 +282,7 @@
'worker.fqdn': {
'title': 'Worker Domain'
},
- 'worker.progam': {
+ 'worker.program': {
'title': 'Worker Program'
},
'worker.version': {
diff --git a/zuul/launcher/gearman.py b/zuul/launcher/gearman.py
index 4c39775..e43e867 100644
--- a/zuul/launcher/gearman.py
+++ b/zuul/launcher/gearman.py
@@ -423,9 +423,11 @@
build = self.builds.get(job.unique)
if build:
+ data = getJobData(job)
+ build.node_labels = data.get('node_labels')
+ build.node_name = data.get('node_name')
if not build.canceled:
if result is None:
- data = getJobData(job)
result = data.get('result')
if result is None:
build.retry = True
diff --git a/zuul/model.py b/zuul/model.py
index f8e0d25..20d4fee 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -581,6 +581,8 @@
self.retry = False
self.parameters = {}
self.worker = Worker()
+ self.node_labels = []
+ self.node_name = None
def __repr__(self):
return ('<Build %s of %s on %s>' %
@@ -802,7 +804,9 @@
'canceled': build.canceled if build else None,
'retry': build.retry if build else None,
'number': build.number if build else None,
- 'worker': worker
+ 'node_labels': build.node_labels if build else [],
+ 'node_name': build.node_name if build else None,
+ 'worker': worker,
})
if self.pipeline.haveAllJobsStarted(self):
diff --git a/zuul/scheduler.py b/zuul/scheduler.py
index a9bd6b2..e9e6cc7 100644
--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@@ -530,6 +530,16 @@
def onBuildStarted(self, build):
self.log.debug("Adding start event for build: %s" % build)
build.start_time = time.time()
+ try:
+ if statsd and build.pipeline:
+ jobname = build.job.name.replace('.', '_')
+ key = 'zuul.pipeline.%s.job.%s.wait_time' % (
+ build.pipeline.name, jobname)
+ dt = int((build.start_time - build.launch_time) * 1000)
+ statsd.timing(key, dt)
+ statsd.incr(key)
+ except:
+ self.log.exception("Exception reporting runtime stats")
event = BuildStartedEvent(build)
self.result_event_queue.put(event)
self.wake_event.set()
@@ -547,14 +557,24 @@
try:
if statsd and build.pipeline:
jobname = build.job.name.replace('.', '_')
+ key = 'zuul.pipeline.%s.all_jobs' % build.pipeline.name
+ for label in build.node_labels:
+ # Jenkins includes the node name in its list of labels, so
+ # we filter it out here, since that is not statistically
+ # interesting.
+ if label == build.node_name:
+ continue
+ dt = int((build.start_time - build.launch_time) * 1000)
+ key = 'zuul.node_type.%s.job.%s.wait_time' % (
+ label, jobname)
+ statsd.timing(key, dt)
+ statsd.incr(key)
key = 'zuul.pipeline.%s.job.%s.%s' % (build.pipeline.name,
jobname, build.result)
if build.result in ['SUCCESS', 'FAILURE'] and build.start_time:
dt = int((build.end_time - build.start_time) * 1000)
statsd.timing(key, dt)
statsd.incr(key)
- key = 'zuul.pipeline.%s.all_jobs' % build.pipeline.name
- statsd.incr(key)
except:
self.log.exception("Exception reporting runtime stats")
event = BuildCompletedEvent(build)