Add timeout variable to jobs
Plumb through support for timeout for jobs. By default, we don't
support any timeout, which means jobs live forever.
Change-Id: Ice4fedffc6086676f54da0f06630a0ff7ad7d916
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
diff --git a/tests/base.py b/tests/base.py
index e6542e7..65c7052 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -749,11 +749,11 @@
class RecordingAnsibleJob(zuul.launcher.server.AnsibleJob):
- def runPlaybooks(self):
+ def runPlaybooks(self, args):
build = self.launcher_server.job_builds[self.job.unique]
build.jobdir = self.jobdir
- result = super(RecordingAnsibleJob, self).runPlaybooks()
+ result = super(RecordingAnsibleJob, self).runPlaybooks(args)
self.launcher_server.lock.acquire()
self.launcher_server.build_history.append(
diff --git a/tests/fixtures/config/ansible/git/common-config/playbooks/timeout.yaml b/tests/fixtures/config/ansible/git/common-config/playbooks/timeout.yaml
new file mode 100644
index 0000000..4af20eb
--- /dev/null
+++ b/tests/fixtures/config/ansible/git/common-config/playbooks/timeout.yaml
@@ -0,0 +1,4 @@
+- hosts: all
+ tasks:
+ - name: Pause for 60 seconds, so zuul aborts our job.
+ shell: sleep 60
diff --git a/tests/fixtures/config/ansible/git/common-config/zuul.yaml b/tests/fixtures/config/ansible/git/common-config/zuul.yaml
index baa7aba..30148f0 100644
--- a/tests/fixtures/config/ansible/git/common-config/zuul.yaml
+++ b/tests/fixtures/config/ansible/git/common-config/zuul.yaml
@@ -44,3 +44,8 @@
flagpath: "{{zuul._test.test_root}}/{{zuul.uuid}}.flag"
roles:
- zuul: bare-role
+
+- job:
+ parent: python27
+ name: timeout
+ timeout: 1
diff --git a/tests/fixtures/config/ansible/git/org_project/.zuul.yaml b/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
index 6abfc47..c76ba70 100644
--- a/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
+++ b/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
@@ -9,3 +9,4 @@
jobs:
- python27
- faillocal
+ - timeout
diff --git a/tests/unit/test_v3.py b/tests/unit/test_v3.py
index 97002b2..f69ffe6 100644
--- a/tests/unit/test_v3.py
+++ b/tests/unit/test_v3.py
@@ -195,6 +195,8 @@
A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
self.waitUntilSettled()
+ build = self.getJobFromHistory('timeout')
+ self.assertEqual(build.result, 'ABORTED')
build = self.getJobFromHistory('faillocal')
self.assertEqual(build.result, 'FAILURE')
build = self.getJobFromHistory('python27')
diff --git a/zuul/launcher/client.py b/zuul/launcher/client.py
index ffb9f7e..6abd6f4 100644
--- a/zuul/launcher/client.py
+++ b/zuul/launcher/client.py
@@ -337,6 +337,7 @@
merger_items = map(make_merger_item, all_items)
params['job'] = job.name
+ params['timeout'] = job.timeout
params['items'] = merger_items
params['projects'] = []
diff --git a/zuul/launcher/server.py b/zuul/launcher/server.py
index c10b30e..1b8d2c6 100644
--- a/zuul/launcher/server.py
+++ b/zuul/launcher/server.py
@@ -35,9 +35,6 @@
import zuul.ansible.library
from zuul.lib import commandsocket
-ANSIBLE_WATCHDOG_GRACE = 5 * 60
-
-
COMMANDS = ['stop', 'pause', 'unpause', 'graceful', 'verbose',
'unverbose']
@@ -591,7 +588,7 @@
self.job.sendWorkData(json.dumps(data))
self.job.sendWorkStatus(0, 100)
- result = self.runPlaybooks()
+ result = self.runPlaybooks(args)
if result is None:
self.job.sendWorkFail()
@@ -599,17 +596,20 @@
result = dict(result=result)
self.job.sendWorkComplete(json.dumps(result))
- def runPlaybooks(self):
+ def runPlaybooks(self, args):
result = None
for playbook in self.jobdir.pre_playbooks:
- pre_status, pre_code = self.runAnsiblePlaybook(playbook)
+ # TODOv3(pabelanger): Implement pre-run timeout setting.
+ pre_status, pre_code = self.runAnsiblePlaybook(
+ playbook, args['timeout'])
if pre_status != self.RESULT_NORMAL or pre_code != 0:
# These should really never fail, so return None and have
# zuul try again
return result
- job_status, job_code = self.runAnsiblePlaybook(self.jobdir.playbook)
+ job_status, job_code = self.runAnsiblePlaybook(
+ self.jobdir.playbook, args['timeout'])
if job_status == self.RESULT_TIMED_OUT:
return 'TIMED_OUT'
if job_status == self.RESULT_ABORTED:
@@ -626,8 +626,9 @@
result = 'FAILURE'
for playbook in self.jobdir.post_playbooks:
+ # TODOv3(pabelanger): Implement post-run timeout setting.
post_status, post_code = self.runAnsiblePlaybook(
- playbook, success)
+ playbook, args['timeout'], success)
if post_status != self.RESULT_NORMAL or post_code != 0:
result = 'POST_FAILURE'
return result
@@ -911,23 +912,24 @@
)
ret = None
- watchdog = Watchdog(timeout + ANSIBLE_WATCHDOG_GRACE,
- self._ansibleTimeout,
- ("Ansible timeout exceeded",))
- watchdog.start()
+ if timeout:
+ watchdog = Watchdog(timeout, self._ansibleTimeout,
+ ("Ansible timeout exceeded",))
+ watchdog.start()
try:
for line in iter(self.proc.stdout.readline, b''):
line = line[:1024].rstrip()
self.log.debug("Ansible output: %s" % (line,))
ret = self.proc.wait()
finally:
- watchdog.stop()
+ if timeout:
+ watchdog.stop()
self.log.debug("Ansible exit code: %s" % (ret,))
with self.proc_lock:
self.proc = None
- if watchdog.timed_out:
+ if timeout and watchdog.timed_out:
return (self.RESULT_TIMED_OUT, None)
if ret == 3:
# AnsibleHostUnreachable: We had a network issue connecting to
@@ -939,7 +941,7 @@
return (self.RESULT_NORMAL, ret)
- def runAnsiblePlaybook(self, playbook, success=None):
+ def runAnsiblePlaybook(self, playbook, timeout, success=None):
env_copy = os.environ.copy()
env_copy['LOGNAME'] = 'zuul'
@@ -955,8 +957,5 @@
cmd.extend(['-e@%s' % self.jobdir.vars, verbose])
- # TODOv3: get this from the job
- timeout = 60
-
return self.runAnsible(
cmd=cmd, timeout=timeout, trusted=playbook.trusted)