Merge "Add post-timeout setting"
diff --git a/doc/source/user/config.rst b/doc/source/user/config.rst
index 8492423..7d2dae1 100644
--- a/doc/source/user/config.rst
+++ b/doc/source/user/config.rst
@@ -710,6 +710,21 @@
       timeout is supplied, the job may run indefinitely.  Supplying a
       timeout is highly recommended.
 
+      This timeout only applies to the pre-run and run playbooks in a
+      job.
+
+   .. attr:: post-timeout
+
+      The time in seconds that each post playbook should be allowed to run
+      before it is automatically aborted and failure is reported.  If no
+      post-timeout is supplied, the job may run indefinitely.  Supplying a
+      post-timeout is highly recommended.
+
+      The post-timeout is handled separately from the above timeout because
+      the post playbooks are typically where you will copy jobs logs.
+      In the event of the pre-run or run playbooks timing out we want to
+      do our best to copy the job logs in the post-run playbooks.
+
    .. attr:: attempts
       :default: 3
 
diff --git a/doc/source/user/jobs.rst b/doc/source/user/jobs.rst
index 820e316..4e1c33d 100644
--- a/doc/source/user/jobs.rst
+++ b/doc/source/user/jobs.rst
@@ -289,6 +289,10 @@
 
       The job timeout, in seconds.
 
+   .. var:: post_timeout
+
+      The post-run playbook timeout, in seconds.
+
    .. var:: jobtags
 
       A list of tags associated with the job.  Not to be confused with
diff --git a/tests/fixtures/config/ansible/git/common-config/zuul.yaml b/tests/fixtures/config/ansible/git/common-config/zuul.yaml
index d0a8f7b..0112141 100644
--- a/tests/fixtures/config/ansible/git/common-config/zuul.yaml
+++ b/tests/fixtures/config/ansible/git/common-config/zuul.yaml
@@ -99,6 +99,12 @@
 
 - job:
     parent: python27
+    name: post-timeout
+    post-run: playbooks/timeout.yaml
+    post-timeout: 1
+
+- job:
+    parent: python27
     name: check-vars
     run: playbooks/check-vars.yaml
     nodeset:
diff --git a/tests/fixtures/config/ansible/git/org_project/.zuul.yaml b/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
index 447f6cd..a1e144f 100644
--- a/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
+++ b/tests/fixtures/config/ansible/git/org_project/.zuul.yaml
@@ -17,5 +17,6 @@
         - check-vars
         - check-secret-names
         - timeout
+        - post-timeout
         - hello-world
         - failpost
diff --git a/tests/unit/test_v3.py b/tests/unit/test_v3.py
index 1338d20..e36c8f6 100755
--- a/tests/unit/test_v3.py
+++ b/tests/unit/test_v3.py
@@ -2048,6 +2048,12 @@
         build_timeout = self.getJobFromHistory('timeout')
         with self.jobLog(build_timeout):
             self.assertEqual(build_timeout.result, 'TIMED_OUT')
+            post_flag_path = os.path.join(self.test_root, build_timeout.uuid +
+                                          '.post.flag')
+            self.assertTrue(os.path.exists(post_flag_path))
+        build_post_timeout = self.getJobFromHistory('post-timeout')
+        with self.jobLog(build_post_timeout):
+            self.assertEqual(build_post_timeout.result, 'POST_FAILURE')
         build_faillocal = self.getJobFromHistory('faillocal')
         with self.jobLog(build_faillocal):
             self.assertEqual(build_faillocal.result, 'FAILURE')
diff --git a/zuul/configloader.py b/zuul/configloader.py
index f6a553f..ac3afdd 100644
--- a/zuul/configloader.py
+++ b/zuul/configloader.py
@@ -498,6 +498,7 @@
                       # validation happens in NodeSetParser
                       'nodeset': vs.Any(dict, str),
                       'timeout': int,
+                      'post-timeout': int,
                       'attempts': int,
                       'pre-run': to_list(str),
                       'post-run': to_list(str),
@@ -525,6 +526,7 @@
         'abstract',
         'protected',
         'timeout',
+        'post-timeout',
         'workspace',
         'voting',
         'hold-following-changes',
@@ -634,6 +636,10 @@
            int(conf['timeout']) > tenant.max_job_timeout:
             raise MaxTimeoutError(job, tenant)
 
+        if conf.get('post-timeout') and tenant.max_job_timeout != -1 and \
+           int(conf['post-timeout']) > tenant.max_job_timeout:
+            raise MaxTimeoutError(job, tenant)
+
         if 'post-review' in conf:
             if conf['post-review']:
                 job.post_review = True
diff --git a/zuul/executor/client.py b/zuul/executor/client.py
index d561232..c09d4e1 100644
--- a/zuul/executor/client.py
+++ b/zuul/executor/client.py
@@ -186,6 +186,7 @@
         params = dict()
         params['job'] = job.name
         params['timeout'] = job.timeout
+        params['post_timeout'] = job.post_timeout
         params['items'] = merger_items
         params['projects'] = []
         if hasattr(item.change, 'branch'):
diff --git a/zuul/executor/server.py b/zuul/executor/server.py
index 8de6fe0..fbc34c4 100644
--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@@ -878,8 +878,10 @@
         success = False
         self.started = True
         time_started = time.time()
-        # timeout value is total job timeout or put another way
-        # the cummulative time that pre, run, and post can consume.
+        # timeout value is "total" job timeout which accounts for
+        # pre-run and run playbooks. post-run is different because
+        # it is used to copy out job logs and we want to do our best
+        # to copy logs even when the job has timed out.
         job_timeout = args['timeout']
         for index, playbook in enumerate(self.jobdir.pre_playbooks):
             # TODOv3(pabelanger): Implement pre-run timeout setting.
@@ -914,11 +916,15 @@
                 # run it again.
                 return None
 
+        post_timeout = args['post_timeout']
         for index, playbook in enumerate(self.jobdir.post_playbooks):
-            # TODOv3(pabelanger): Implement post-run timeout setting.
-            ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
+            # Post timeout operates a little differently to the main job
+            # timeout. We give each post playbook the full post timeout to
+            # do its job because post is where you'll often record job logs
+            # which are vital to understanding why timeouts have happened in
+            # the first place.
             post_status, post_code = self.runAnsiblePlaybook(
-                playbook, ansible_timeout, success, phase='post', index=index)
+                playbook, post_timeout, success, phase='post', index=index)
             if post_status == self.RESULT_ABORTED:
                 return 'ABORTED'
             if post_status != self.RESULT_NORMAL or post_code != 0:
diff --git a/zuul/model.py b/zuul/model.py
index 45fc1a8..ff84048 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -839,6 +839,7 @@
         self.execution_attributes = dict(
             parent=None,
             timeout=None,
+            post_timeout=None,
             variables={},
             nodeset=NodeSet(),
             workspace=None,