Add attempts logic for jobs
Today, if a job is aborted, zuul will launch said job until success /
failure. If the job continues to abort, it will loop forever. As a
result, we now added the ability to limit this. By default we'll try
to relaunch an aborted job a total of 3 times, before RETRY_LIMIT is
returned as the result.
Change-Id: Ie26fdc29c07430ebfb3df8be8ac1786d63d7e0fe
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
diff --git a/tests/base.py b/tests/base.py
index c5b5b78..a14b4a9 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -540,6 +540,7 @@
self.wait_condition = threading.Condition()
self.waiting = False
self.aborted = False
+ self.requeue = False
self.created = time.time()
self.description = ''
self.run_error = False
@@ -602,6 +603,8 @@
result = 'FAILURE'
if self.aborted:
result = 'ABORTED'
+ if self.requeue:
+ result = None
if self.run_error:
work_fail = True
diff --git a/tests/fixtures/layout-abort-attempts.yaml b/tests/fixtures/layout-abort-attempts.yaml
new file mode 100644
index 0000000..86d9d78
--- /dev/null
+++ b/tests/fixtures/layout-abort-attempts.yaml
@@ -0,0 +1,30 @@
+pipelines:
+ - name: check
+ manager: IndependentPipelineManager
+ trigger:
+ gerrit:
+ - event: patchset-created
+ success:
+ gerrit:
+ verified: 1
+ failure:
+ gerrit:
+ verified: -1
+
+ - name: post
+ manager: IndependentPipelineManager
+ trigger:
+ gerrit:
+ - event: ref-updated
+ ref: ^(?!refs/).*$
+
+jobs:
+ - name: project-test1
+ attempts: 4
+
+projects:
+ - name: org/project
+ check:
+ - project-merge:
+ - project-test1
+ - project-test2
diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py
index 335f987..b6fa4a3 100755
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@@ -4481,3 +4481,36 @@
self.assertIn(
'- docs-draft-test2 https://server/job/docs-draft-test2/1/',
body[3])
+
+ def test_rerun_on_abort(self):
+ "Test that if a worker fails to run a job, it is run again"
+
+ self.config.set('zuul', 'layout_config',
+ 'tests/fixtures/layout-abort-attempts.yaml')
+ self.sched.reconfigure(self.config)
+ self.worker.hold_jobs_in_build = True
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+ self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+ self.waitUntilSettled()
+
+ self.worker.release('.*-merge')
+ self.waitUntilSettled()
+
+ self.assertEqual(len(self.builds), 2)
+ self.builds[0].requeue = True
+ self.worker.release('.*-test*')
+ self.waitUntilSettled()
+
+ for x in range(3):
+ self.assertEqual(len(self.builds), 1)
+ self.builds[0].requeue = True
+ self.worker.release('.*-test1')
+ self.waitUntilSettled()
+
+ self.worker.hold_jobs_in_build = False
+ self.worker.release()
+ self.waitUntilSettled()
+ self.assertEqual(len(self.history), 6)
+ self.assertEqual(self.countJobResults(self.history, 'SUCCESS'), 2)
+ self.assertEqual(A.reported, 1)
+ self.assertIn('RETRY_LIMIT', A.messages[0])