Add timeout variable to jobs

Plumb through support for timeout for jobs. By default, we don't
support any timeout, which means jobs live forever.

Change-Id: Ice4fedffc6086676f54da0f06630a0ff7ad7d916
Signed-off-by: Paul Belanger <pabelanger@redhat.com>
diff --git a/tests/base.py b/tests/base.py
index e6542e7..65c7052 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -749,11 +749,11 @@
 
 
 class RecordingAnsibleJob(zuul.launcher.server.AnsibleJob):
-    def runPlaybooks(self):
+    def runPlaybooks(self, args):
         build = self.launcher_server.job_builds[self.job.unique]
         build.jobdir = self.jobdir
 
-        result = super(RecordingAnsibleJob, self).runPlaybooks()
+        result = super(RecordingAnsibleJob, self).runPlaybooks(args)
 
         self.launcher_server.lock.acquire()
         self.launcher_server.build_history.append(
diff --git a/tests/fixtures/config/ansible/git/common-config/playbooks/timeout.yaml b/tests/fixtures/config/ansible/git/common-config/playbooks/timeout.yaml
new file mode 100644
index 0000000..4af20eb
--- /dev/null
+++ b/tests/fixtures/config/ansible/git/common-config/playbooks/timeout.yaml
@@ -0,0 +1,4 @@
+- hosts: all
+  tasks:
+    - name: Pause for 60 seconds, so zuul aborts our job.
+      shell: sleep 60
diff --git a/tests/fixtures/config/ansible/git/common-config/zuul.yaml b/tests/fixtures/config/ansible/git/common-config/zuul.yaml
index baa7aba..30148f0 100644
--- a/tests/fixtures/config/ansible/git/common-config/zuul.yaml
+++ b/tests/fixtures/config/ansible/git/common-config/zuul.yaml
@@ -44,3 +44,8 @@
       flagpath: "{{zuul._test.test_root}}/{{zuul.uuid}}.flag"
     roles:
       - zuul: bare-role
+
+- job:
+    parent: python27
+    name: timeout
+    timeout: 1
diff --git a/tests/fixtures/config/ansible/git/org_project/.zuul.yaml b/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
index 6abfc47..c76ba70 100644
--- a/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
+++ b/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
@@ -9,3 +9,4 @@
       jobs:
         - python27
         - faillocal
+        - timeout
diff --git a/tests/unit/test_v3.py b/tests/unit/test_v3.py
index 97002b2..f69ffe6 100644
--- a/tests/unit/test_v3.py
+++ b/tests/unit/test_v3.py
@@ -195,6 +195,8 @@
         A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
         self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
         self.waitUntilSettled()
+        build = self.getJobFromHistory('timeout')
+        self.assertEqual(build.result, 'ABORTED')
         build = self.getJobFromHistory('faillocal')
         self.assertEqual(build.result, 'FAILURE')
         build = self.getJobFromHistory('python27')
diff --git a/zuul/launcher/client.py b/zuul/launcher/client.py
index ffb9f7e..6abd6f4 100644
--- a/zuul/launcher/client.py
+++ b/zuul/launcher/client.py
@@ -337,6 +337,7 @@
         merger_items = map(make_merger_item, all_items)
 
         params['job'] = job.name
+        params['timeout'] = job.timeout
         params['items'] = merger_items
         params['projects'] = []
 
diff --git a/zuul/launcher/server.py b/zuul/launcher/server.py
index c10b30e..1b8d2c6 100644
--- a/zuul/launcher/server.py
+++ b/zuul/launcher/server.py
@@ -35,9 +35,6 @@
 import zuul.ansible.library
 from zuul.lib import commandsocket
 
-ANSIBLE_WATCHDOG_GRACE = 5 * 60
-
-
 COMMANDS = ['stop', 'pause', 'unpause', 'graceful', 'verbose',
             'unverbose']
 
@@ -591,7 +588,7 @@
         self.job.sendWorkData(json.dumps(data))
         self.job.sendWorkStatus(0, 100)
 
-        result = self.runPlaybooks()
+        result = self.runPlaybooks(args)
 
         if result is None:
             self.job.sendWorkFail()
@@ -599,17 +596,20 @@
         result = dict(result=result)
         self.job.sendWorkComplete(json.dumps(result))
 
-    def runPlaybooks(self):
+    def runPlaybooks(self, args):
         result = None
 
         for playbook in self.jobdir.pre_playbooks:
-            pre_status, pre_code = self.runAnsiblePlaybook(playbook)
+            # TODOv3(pabelanger): Implement pre-run timeout setting.
+            pre_status, pre_code = self.runAnsiblePlaybook(
+                playbook, args['timeout'])
             if pre_status != self.RESULT_NORMAL or pre_code != 0:
                 # These should really never fail, so return None and have
                 # zuul try again
                 return result
 
-        job_status, job_code = self.runAnsiblePlaybook(self.jobdir.playbook)
+        job_status, job_code = self.runAnsiblePlaybook(
+            self.jobdir.playbook, args['timeout'])
         if job_status == self.RESULT_TIMED_OUT:
             return 'TIMED_OUT'
         if job_status == self.RESULT_ABORTED:
@@ -626,8 +626,9 @@
             result = 'FAILURE'
 
         for playbook in self.jobdir.post_playbooks:
+            # TODOv3(pabelanger): Implement post-run timeout setting.
             post_status, post_code = self.runAnsiblePlaybook(
-                playbook, success)
+                playbook, args['timeout'], success)
             if post_status != self.RESULT_NORMAL or post_code != 0:
                 result = 'POST_FAILURE'
         return result
@@ -911,23 +912,24 @@
             )
 
         ret = None
-        watchdog = Watchdog(timeout + ANSIBLE_WATCHDOG_GRACE,
-                            self._ansibleTimeout,
-                            ("Ansible timeout exceeded",))
-        watchdog.start()
+        if timeout:
+            watchdog = Watchdog(timeout, self._ansibleTimeout,
+                                ("Ansible timeout exceeded",))
+            watchdog.start()
         try:
             for line in iter(self.proc.stdout.readline, b''):
                 line = line[:1024].rstrip()
                 self.log.debug("Ansible output: %s" % (line,))
             ret = self.proc.wait()
         finally:
-            watchdog.stop()
+            if timeout:
+                watchdog.stop()
         self.log.debug("Ansible exit code: %s" % (ret,))
 
         with self.proc_lock:
             self.proc = None
 
-        if watchdog.timed_out:
+        if timeout and watchdog.timed_out:
             return (self.RESULT_TIMED_OUT, None)
         if ret == 3:
             # AnsibleHostUnreachable: We had a network issue connecting to
@@ -939,7 +941,7 @@
 
         return (self.RESULT_NORMAL, ret)
 
-    def runAnsiblePlaybook(self, playbook, success=None):
+    def runAnsiblePlaybook(self, playbook, timeout, success=None):
         env_copy = os.environ.copy()
         env_copy['LOGNAME'] = 'zuul'
 
@@ -955,8 +957,5 @@
 
         cmd.extend(['-e@%s' % self.jobdir.vars, verbose])
 
-        # TODOv3: get this from the job
-        timeout = 60
-
         return self.runAnsible(
             cmd=cmd, timeout=timeout, trusted=playbook.trusted)