Fix checking all builds are waiting in tests
The check for whether all outstanding builds are waiting (which
means the system is stable and may be examined) did not account
for builds which may have just started running but had not reported
their start back to Zuul. This means that the test system could
determine the system had settled while a job start packet was
in-flight. This has been seen to cause the test_abandoned_gate
test to fail due to the inconsistent state.
Since this race can occur in reality, also update the launcher
itself to more gracefully handle this condition.
Change-Id: I734094514db294564f6526e42b801a7e1d22a021
diff --git a/tests/base.py b/tests/base.py
index 08b3cab..3d0c39f 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -1167,8 +1167,6 @@
return True
def areAllBuildsWaiting(self):
- ret = True
-
builds = self.launcher.builds.values()
for build in builds:
client_job = None
@@ -1180,35 +1178,34 @@
if not client_job:
self.log.debug("%s is not known to the gearman client" %
build)
- ret = False
- continue
+ return False
if not client_job.handle:
self.log.debug("%s has no handle" % client_job)
- ret = False
- continue
+ return False
server_job = self.gearman_server.jobs.get(client_job.handle)
if not server_job:
self.log.debug("%s is not known to the gearman server" %
client_job)
- ret = False
- continue
+ return False
if not hasattr(server_job, 'waiting'):
self.log.debug("%s is being enqueued" % server_job)
- ret = False
- continue
+ return False
if server_job.waiting:
continue
worker_job = self.worker.gearman_jobs.get(server_job.unique)
if worker_job:
+ if build.number is None:
+ self.log.debug("%s has not reported start" % worker_job)
+ return False
if worker_job.build.isWaiting():
continue
else:
self.log.debug("%s is running" % worker_job)
- ret = False
+ return False
else:
self.log.debug("%s is unassigned" % server_job)
- ret = False
- return ret
+ return False
+ return True
def waitUntilSettled(self):
self.log.debug("Waiting until settled...")