On reconfiguration, re-enqueue items at the same position
Upon reconfiguration, we currently re-enqueue every item back
into its pipeline, in case a difference in the configuration would
change what should occur. In the simple case, this is fine, but
if a dependent pipeline has a branching dependency structure due
to already failing jobs, we would erroneously re-order the changes
back into a linear arrangement. The next pass through the pipeline
manager would move those items back to where they should be, but
in doing so, would reset their builds.
To correct this, after re-enqueueing a change, if the change that
was previously ahead of it in the pipeline was also successfully
re-enqueued, immediately move the change behind it (or if the
change ahead was "None" meaning it was its own head, move it behind
no change). This should have the effect of putting changes back
where they were in relation to other failing changes. If the change
ahead was not successfully re-enqueued, the current behavior of
simply putting it behind the nearest change in the queue is preserved.
If anything more complex happens, any errors will be corrected on the
next pass through the pipeline manager.
Change-Id: Ie3771d9bbbc1ca77425cf62751d8e5f70ba1f14c
diff --git a/tests/fixtures/layouts/reconfigure-failed-head.yaml b/tests/fixtures/layouts/reconfigure-failed-head.yaml
new file mode 100644
index 0000000..3347c9b
--- /dev/null
+++ b/tests/fixtures/layouts/reconfigure-failed-head.yaml
@@ -0,0 +1,56 @@
+- pipeline:
+ name: check
+ manager: independent
+ trigger:
+ gerrit:
+ - event: patchset-created
+ success:
+ gerrit:
+ Verified: 1
+ failure:
+ gerrit:
+ Verified: -1
+
+- pipeline:
+ name: gate
+ manager: dependent
+ success-message: Build succeeded (gate).
+ trigger:
+ gerrit:
+ - event: comment-added
+ approval:
+ - Approved: 1
+ success:
+ gerrit:
+ Verified: 2
+ submit: true
+ failure:
+ gerrit:
+ Verified: -2
+ start:
+ gerrit:
+ Verified: 0
+ precedence: high
+
+- job:
+ name: base
+ parent: null
+
+- job:
+ name: job1
+ run: playbooks/job1.yaml
+
+- job:
+ name: job2
+ run: playbooks/job2.yaml
+
+- project:
+ name: org/project
+ check:
+ jobs:
+ - job1
+ - job2
+ gate:
+ jobs:
+ - job1
+ - job2
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index b0df2b2..53a20ff 100755
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -3034,6 +3034,70 @@
self.assertEqual(len(tenant.layout.pipelines['check'].queues), 0)
self.assertIn('Build succeeded', A.messages[0])
+ @simple_layout("layouts/reconfigure-failed-head.yaml")
+ def test_live_reconfiguration_failed_change_at_head(self):
+ # Test that if we reconfigure with a failed change at head,
+ # that the change behind it isn't reparented onto it.
+
+ self.executor_server.hold_jobs_in_build = True
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+ A.addApproval('Code-Review', 2)
+ B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
+ B.addApproval('Code-Review', 2)
+
+ self.executor_server.failJob('job1', A)
+
+ self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
+ self.fake_gerrit.addEvent(B.addApproval('Approved', 1))
+
+ self.waitUntilSettled()
+
+ self.assertBuilds([
+ dict(name='job1', changes='1,1'),
+ dict(name='job2', changes='1,1'),
+ dict(name='job1', changes='1,1 2,1'),
+ dict(name='job2', changes='1,1 2,1'),
+ ])
+
+ self.release(self.builds[0])
+ self.waitUntilSettled()
+
+ self.assertBuilds([
+ dict(name='job2', changes='1,1'),
+ dict(name='job1', changes='2,1'),
+ dict(name='job2', changes='2,1'),
+ ])
+
+ # Unordered history comparison because the aborts can finish
+ # in any order.
+ self.assertHistory([
+ dict(name='job1', result='FAILURE', changes='1,1'),
+ dict(name='job1', result='ABORTED', changes='1,1 2,1'),
+ dict(name='job2', result='ABORTED', changes='1,1 2,1'),
+ ], ordered=False)
+
+ self.sched.reconfigure(self.config)
+ self.waitUntilSettled()
+
+ self.executor_server.hold_jobs_in_build = False
+ self.executor_server.release()
+ self.waitUntilSettled()
+
+ self.assertBuilds([])
+
+ self.assertHistory([
+ dict(name='job1', result='FAILURE', changes='1,1'),
+ dict(name='job1', result='ABORTED', changes='1,1 2,1'),
+ dict(name='job2', result='ABORTED', changes='1,1 2,1'),
+ dict(name='job2', result='SUCCESS', changes='1,1'),
+ dict(name='job1', result='SUCCESS', changes='2,1'),
+ dict(name='job2', result='SUCCESS', changes='2,1'),
+ ], ordered=False)
+ self.assertEqual(A.data['status'], 'NEW')
+ self.assertEqual(B.data['status'], 'MERGED')
+ self.assertEqual(A.reported, 2)
+ self.assertEqual(B.reported, 2)
+
def test_delayed_repo_init(self):
self.init_repo("org/new-project")
files = {'README': ''}
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index 9c30d74..6c72c2d 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -223,13 +223,19 @@
if item.change.equals(change):
self.removeItem(item)
- def reEnqueueItem(self, item, last_head):
+ def reEnqueueItem(self, item, last_head, old_item_ahead, item_ahead_valid):
with self.getChangeQueue(item.change, last_head.queue) as change_queue:
if change_queue:
self.log.debug("Re-enqueing change %s in queue %s" %
(item.change, change_queue))
change_queue.enqueueItem(item)
+ # If the old item ahead was re-enqued, this value will
+ # be true, so we should attempt to move the item back
+ # to where it was in case an item ahead is already
+ # failing.
+ if item_ahead_valid:
+ change_queue.moveItem(item, old_item_ahead)
# Get an updated copy of the layout and update the job
# graph if necessary. This resumes the buildset merge
# state machine. If we have an up-to-date layout, it
diff --git a/zuul/scheduler.py b/zuul/scheduler.py
index 73fa923..a725fcd 100644
--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@@ -614,13 +614,22 @@
item.queue = None
item.change.project = self._reenqueueGetProject(
tenant, item)
+ # If the old item ahead made it in, re-enqueue
+ # this one behind it.
+ if item.item_ahead in items_to_remove:
+ old_item_ahead = None
+ item_ahead_valid = False
+ else:
+ old_item_ahead = item.item_ahead
+ item_ahead_valid = True
item.item_ahead = None
item.items_behind = []
reenqueued = False
if item.change.project:
try:
reenqueued = new_pipeline.manager.reEnqueueItem(
- item, last_head)
+ item, last_head, old_item_ahead,
+ item_ahead_valid=item_ahead_valid)
except Exception:
self.log.exception(
"Exception while re-enqueing item %s",