Merge "Fix test_cache_hard_links when on tmpfs" into feature/zuulv3
diff --git a/doc/source/admin/client.rst b/doc/source/admin/client.rst
index 6b62360..961b205 100644
--- a/doc/source/admin/client.rst
+++ b/doc/source/admin/client.rst
@@ -22,6 +22,14 @@
 
 The following subcommands are supported:
 
+Autohold
+^^^^^^^^
+.. program-output:: zuul autohold --help
+
+Example::
+
+  zuul autohold --tenant openstack --project example_project --job example_job --reason "reason text" --count 1
+
 Enqueue
 ^^^^^^^
 .. program-output:: zuul enqueue --help
diff --git a/tests/fixtures/config/multi-driver/git/common-config/zuul.yaml b/tests/fixtures/config/multi-driver/git/common-config/zuul.yaml
index 2dab845..7b5a77c 100644
--- a/tests/fixtures/config/multi-driver/git/common-config/zuul.yaml
+++ b/tests/fixtures/config/multi-driver/git/common-config/zuul.yaml
@@ -1,5 +1,5 @@
 - pipeline:
-    name: check_github
+    name: check
     manager: independent
     trigger:
       github:
@@ -8,25 +8,23 @@
             - opened
             - changed
             - reopened
-    success:
-      github:
-        status: 'success'
-    failure:
-      github:
-        status: 'failure'
-
-- pipeline:
-    name: check_gerrit
-    manager: independent
-    trigger:
       gerrit:
         - event: patchset-created
     success:
+      github:
+        status: 'success'
       gerrit:
-        verify: 1
+        Verified: 1
     failure:
+      github:
+        status: 'failure'
       gerrit:
-        verify: 1
+        Verified: 1
+    start:
+      github:
+        comment: true
+      gerrit:
+        Verified: 0
 
 - job:
     name: project-gerrit
@@ -35,12 +33,12 @@
 
 - project:
     name: org/project
-    check_gerrit:
+    check:
       jobs:
         - project-gerrit
 
 - project:
     name: org/project1
-    check_github:
+    check:
       jobs:
         - project1-github
diff --git a/tests/fixtures/layouts/autohold.yaml b/tests/fixtures/layouts/autohold.yaml
new file mode 100644
index 0000000..015e562
--- /dev/null
+++ b/tests/fixtures/layouts/autohold.yaml
@@ -0,0 +1,24 @@
+- pipeline:
+    name: check
+    manager: independent
+    trigger:
+      gerrit:
+        - event: patchset-created
+    success:
+      gerrit:
+        Verified: 1
+    failure:
+      gerrit:
+        Verified: -1
+
+- job:
+    name: project-test2
+    nodes:
+      - name: controller
+        label: label1
+
+- project:
+    name: org/project
+    check:
+      jobs:
+        - project-test2
diff --git a/tests/fixtures/layouts/reporting-multiple-github.yaml b/tests/fixtures/layouts/reporting-multiple-github.yaml
new file mode 100644
index 0000000..f14000e
--- /dev/null
+++ b/tests/fixtures/layouts/reporting-multiple-github.yaml
@@ -0,0 +1,40 @@
+- pipeline:
+    name: check
+    description: Standard check
+    manager: independent
+    trigger:
+      github:
+        - event: pull_request
+          action: opened
+      github_ent:
+        - event: pull_request
+          action: opened
+    start:
+      github:
+        status: 'pending'
+        comment: false
+      github_ent:
+        status: 'pending'
+        comment: false
+    success:
+      github:
+        status: 'success'
+      github_ent:
+        status: 'success'
+
+- job:
+    name: project1-test1
+- job:
+    name: project2-test1
+
+- project:
+    name: org/project1
+    check:
+      jobs:
+        - project1-test1
+
+- project:
+    name: org/project2
+    check:
+      jobs:
+        - project2-test2
diff --git a/tests/unit/test_github_driver.py b/tests/unit/test_github_driver.py
index e8dff51..1ae36aa 100644
--- a/tests/unit/test_github_driver.py
+++ b/tests/unit/test_github_driver.py
@@ -406,6 +406,48 @@
         self.assertEqual(len(D.comments), 1)
         self.assertEqual(D.comments[0], 'Merge failed')
 
+    @simple_layout('layouts/reporting-multiple-github.yaml', driver='github')
+    def test_reporting_multiple_github(self):
+        project = 'org/project1'
+        # pipeline reports pull status both on start and success
+        self.executor_server.hold_jobs_in_build = True
+        A = self.fake_github.openFakePullRequest(project, 'master', 'A')
+        self.fake_github.emitEvent(A.getPullRequestOpenedEvent())
+        # open one on B as well, which should not effect A reporting
+        B = self.fake_github.openFakePullRequest('org/project2', 'master',
+                                                 'B')
+        self.fake_github.emitEvent(B.getPullRequestOpenedEvent())
+        self.waitUntilSettled()
+        # We should have a status container for the head sha
+        statuses = self.fake_github.statuses[project][A.head_sha]
+        self.assertIn(A.head_sha, self.fake_github.statuses[project].keys())
+        # We should only have one status for the head sha
+        self.assertEqual(1, len(statuses))
+        check_status = statuses[0]
+        check_url = ('http://zuul.example.com/status/#%s,%s' %
+                     (A.number, A.head_sha))
+        self.assertEqual('tenant-one/check', check_status['context'])
+        self.assertEqual('Standard check', check_status['description'])
+        self.assertEqual('pending', check_status['state'])
+        self.assertEqual(check_url, check_status['url'])
+        self.assertEqual(0, len(A.comments))
+
+        self.executor_server.hold_jobs_in_build = False
+        self.executor_server.release()
+        self.waitUntilSettled()
+        # We should only have two statuses for the head sha
+        statuses = self.fake_github.statuses[project][A.head_sha]
+        self.assertEqual(2, len(statuses))
+        check_status = statuses[0]
+        check_url = ('http://zuul.example.com/status/#%s,%s' %
+                     (A.number, A.head_sha))
+        self.assertEqual('tenant-one/check', check_status['context'])
+        self.assertEqual('success', check_status['state'])
+        self.assertEqual(check_url, check_status['url'])
+        self.assertEqual(1, len(A.comments))
+        self.assertThat(A.comments[0],
+                        MatchesRegex('.*Build succeeded.*', re.DOTALL))
+
     @simple_layout('layouts/dependent-github.yaml', driver='github')
     def test_parallel_changes(self):
         "Test that changes are tested in parallel and merged in series"
diff --git a/tests/unit/test_multi_driver.py b/tests/unit/test_multi_driver.py
index 864bd31..e40591b 100644
--- a/tests/unit/test_multi_driver.py
+++ b/tests/unit/test_multi_driver.py
@@ -43,3 +43,12 @@
         self.executor_server.hold_jobs_in_build = False
         self.executor_server.release()
         self.waitUntilSettled()
+
+        # Check on reporting results
+        # github should have a success status (only).
+        statuses = self.fake_github.statuses['org/project1'][B.head_sha]
+        self.assertEqual(1, len(statuses))
+        self.assertEqual('success', statuses[0]['state'])
+
+        # gerrit should have only reported twice, on start and success
+        self.assertEqual(A.reported, 2)
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index 5dd3f4e..16d82af 100755
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -1434,6 +1434,60 @@
         self.assertEqual(self.getJobFromHistory('project-test2').result,
                          'FAILURE')
 
+    @simple_layout('layouts/autohold.yaml')
+    def test_autohold(self):
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+
+        client = zuul.rpcclient.RPCClient('127.0.0.1',
+                                          self.gearman_server.port)
+        self.addCleanup(client.shutdown)
+        r = client.autohold('tenant-one', 'org/project', 'project-test2',
+                            "reason text", 1)
+        self.assertTrue(r)
+
+        self.executor_server.failJob('project-test2', A)
+        self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+
+        self.waitUntilSettled()
+
+        self.assertEqual(A.data['status'], 'NEW')
+        self.assertEqual(A.reported, 1)
+        self.assertEqual(self.getJobFromHistory('project-test2').result,
+                         'FAILURE')
+
+        # Check nodepool for a held node
+        held_node = None
+        for node in self.fake_nodepool.getNodes():
+            if node['state'] == zuul.model.STATE_HOLD:
+                held_node = node
+                break
+        self.assertIsNotNone(held_node)
+
+        # Validate node has recorded the failed job
+        self.assertEqual(
+            held_node['hold_job'],
+            " ".join(['tenant-one',
+                      'review.example.com/org/project',
+                      'project-test2'])
+        )
+        self.assertEqual(held_node['hold_reason'], "reason text")
+
+        # Another failed change should not hold any more nodes
+        B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
+        self.executor_server.failJob('project-test2', B)
+        self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
+        self.waitUntilSettled()
+        self.assertEqual(B.data['status'], 'NEW')
+        self.assertEqual(B.reported, 1)
+        self.assertEqual(self.getJobFromHistory('project-test2').result,
+                         'FAILURE')
+
+        held_nodes = 0
+        for node in self.fake_nodepool.getNodes():
+            if node['state'] == zuul.model.STATE_HOLD:
+                held_nodes += 1
+        self.assertEqual(held_nodes, 1)
+
     @simple_layout('layouts/three-projects.yaml')
     def test_dependent_behind_dequeue(self):
         # This particular test does a large amount of merges and needs a little
diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py
index b55aed8..177283e 100755
--- a/zuul/cmd/client.py
+++ b/zuul/cmd/client.py
@@ -46,6 +46,21 @@
                                            description='valid commands',
                                            help='additional help')
 
+        cmd_autohold = subparsers.add_parser(
+            'autohold', help='hold nodes for failed job')
+        cmd_autohold.add_argument('--tenant', help='tenant name',
+                                  required=True)
+        cmd_autohold.add_argument('--project', help='project name',
+                                  required=True)
+        cmd_autohold.add_argument('--job', help='job name',
+                                  required=True)
+        cmd_autohold.add_argument('--reason', help='reason for the hold',
+                                  required=True)
+        cmd_autohold.add_argument('--count',
+                                  help='number of job runs (default: 1)',
+                                  required=False, type=int, default=1)
+        cmd_autohold.set_defaults(func=self.autohold)
+
         cmd_enqueue = subparsers.add_parser('enqueue', help='enqueue a change')
         cmd_enqueue.add_argument('--tenant', help='tenant name',
                                  required=True)
@@ -137,6 +152,16 @@
         else:
             sys.exit(1)
 
+    def autohold(self):
+        client = zuul.rpcclient.RPCClient(
+            self.server, self.port, self.ssl_key, self.ssl_cert, self.ssl_ca)
+        r = client.autohold(tenant=self.args.tenant,
+                            project=self.args.project,
+                            job=self.args.job,
+                            reason=self.args.reason,
+                            count=self.args.count)
+        return r
+
     def enqueue(self):
         client = zuul.rpcclient.RPCClient(
             self.server, self.port, self.ssl_key, self.ssl_cert, self.ssl_ca)
diff --git a/zuul/driver/github/githubreporter.py b/zuul/driver/github/githubreporter.py
index ea41ccd..b0791d9 100644
--- a/zuul/driver/github/githubreporter.py
+++ b/zuul/driver/github/githubreporter.py
@@ -19,6 +19,7 @@
 from zuul.reporter import BaseReporter
 from zuul.exceptions import MergeFailure
 from zuul.driver.util import scalar_or_list
+from zuul.driver.github.githubsource import GithubSource
 
 
 class GithubReporter(BaseReporter):
@@ -41,6 +42,17 @@
 
     def report(self, item):
         """Report on an event."""
+
+        # If the source is not GithubSource we cannot report anything here.
+        if not isinstance(item.change.project.source, GithubSource):
+            return
+
+        # For supporting several Github connections we also must filter by
+        # the canonical hostname.
+        if item.change.project.source.connection.canonical_hostname != \
+                self.connection.canonical_hostname:
+            return
+
         # order is important for github branch protection.
         # A status should be set before a merge attempt
         if self._commit_status is not None:
diff --git a/zuul/model.py b/zuul/model.py
index ed50164..90cc81d 100644
--- a/zuul/model.py
+++ b/zuul/model.py
@@ -356,6 +356,8 @@
         self.label = label
         self.id = None
         self.lock = None
+        self.hold_job = None
+        self.hold_reason = None
         # Attributes from Nodepool
         self._state = 'unknown'
         self.state_time = time.time()
@@ -396,6 +398,8 @@
     def toDict(self):
         d = {}
         d['state'] = self.state
+        d['hold_job'] = self.hold_job
+        d['hold_reason'] = self.hold_reason
         for k in self._keys:
             d[k] = getattr(self, k)
         return d
diff --git a/zuul/nodepool.py b/zuul/nodepool.py
index 8f6489c..6b3632b 100644
--- a/zuul/nodepool.py
+++ b/zuul/nodepool.py
@@ -44,6 +44,35 @@
             except Exception:
                 self.log.exception("Error deleting node request:")
 
+    def holdNodeSet(self, nodeset, autohold_key):
+        '''
+        If requested, perform a hold on the given set of nodes.
+
+        :param NodeSet nodeset: The object containing the set of nodes to hold.
+        :param set autohold_key: A set with the tenant/project/job names
+            associated with the given NodeSet.
+        '''
+        if autohold_key not in self.sched.autohold_requests:
+            return
+
+        (hold_iterations, reason) = self.sched.autohold_requests[autohold_key]
+        nodes = nodeset.getNodes()
+
+        for node in nodes:
+            node.state = model.STATE_HOLD
+            node.hold_job = " ".join(autohold_key)
+            node.hold_reason = reason
+            self.sched.zk.storeNode(node)
+
+        # We remove the autohold when the number of nodes in hold
+        # is equal to or greater than (run iteration count can be
+        # altered) the number of nodes used in a single job run
+        # times the number of run iterations requested.
+        nodes_in_hold = self.sched.zk.heldNodeCount(autohold_key)
+        if nodes_in_hold >= len(nodes) * hold_iterations:
+            self.log.debug("Removing autohold for %s", autohold_key)
+            del self.sched.autohold_requests[autohold_key]
+
     def useNodeSet(self, nodeset):
         self.log.info("Setting nodeset %s in use" % (nodeset,))
         for node in nodeset.getNodes():
diff --git a/zuul/rpcclient.py b/zuul/rpcclient.py
index fd3517f..1a0a084 100644
--- a/zuul/rpcclient.py
+++ b/zuul/rpcclient.py
@@ -48,6 +48,14 @@
         self.log.debug("Job complete, success: %s" % (not job.failure))
         return job
 
+    def autohold(self, tenant, project, job, reason, count):
+        data = {'tenant': tenant,
+                'project': project,
+                'job': job,
+                'reason': reason,
+                'count': count}
+        return not self.submitJob('zuul:autohold', data).failure
+
     def enqueue(self, tenant, pipeline, project, trigger, change):
         data = {'tenant': tenant,
                 'pipeline': pipeline,
diff --git a/zuul/rpclistener.py b/zuul/rpclistener.py
index 6543c91..52a7e51 100644
--- a/zuul/rpclistener.py
+++ b/zuul/rpclistener.py
@@ -49,6 +49,7 @@
         self.thread.start()
 
     def register(self):
+        self.worker.registerFunction("zuul:autohold")
         self.worker.registerFunction("zuul:enqueue")
         self.worker.registerFunction("zuul:enqueue_ref")
         self.worker.registerFunction("zuul:promote")
@@ -89,6 +90,39 @@
             except Exception:
                 self.log.exception("Exception while getting job")
 
+    def handle_autohold(self, job):
+        args = json.loads(job.arguments)
+        params = {}
+
+        tenant = self.sched.abide.tenants.get(args['tenant'])
+        if tenant:
+            params['tenant_name'] = args['tenant']
+        else:
+            error = "Invalid tenant: %s" % args['tenant']
+            job.sendWorkException(error.encode('utf8'))
+            return
+
+        (trusted, project) = tenant.getProject(args['project'])
+        if project:
+            params['project_name'] = project.canonical_name
+        else:
+            error = "Invalid project: %s" % args['project']
+            job.sendWorkException(error.encode('utf8'))
+            return
+
+        params['job_name'] = args['job']
+        params['reason'] = args['reason']
+
+        if args['count'] < 0:
+            error = "Invalid count: %d" % args['count']
+            job.sendWorkException(error.encode('utf8'))
+            return
+
+        params['count'] = args['count']
+
+        self.sched.autohold(**params)
+        job.sendWorkComplete()
+
     def _common_enqueue(self, job):
         args = json.loads(job.arguments)
         event = model.TriggerEvent()
diff --git a/zuul/scheduler.py b/zuul/scheduler.py
index 2217b0b..0a33b00 100644
--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@@ -231,6 +231,7 @@
         self.zuul_version = zuul_version.version_info.release_string()
         self.last_reconfigured = None
         self.tenant_last_reconfigured = {}
+        self.autohold_requests = {}
 
     def stop(self):
         self._stopped = True
@@ -349,6 +350,15 @@
         self.last_reconfigured = int(time.time())
         # TODOv3(jeblair): reconfigure time should be per-tenant
 
+    def autohold(self, tenant_name, project_name, job_name, reason, count):
+        key = (tenant_name, project_name, job_name)
+        if count == 0 and key in self.autohold_requests:
+            self.log.debug("Removing autohold for %s", key)
+            del self.autohold_requests[key]
+        else:
+            self.log.debug("Autohold requested for %s", key)
+            self.autohold_requests[key] = (count, reason)
+
     def promote(self, tenant_name, pipeline_name, change_ids):
         event = PromoteEvent(tenant_name, pipeline_name, change_ids)
         self.management_event_queue.put(event)
@@ -828,6 +838,16 @@
         # the nodes to nodepool.
         try:
             nodeset = build.build_set.getJobNodeSet(build.job.name)
+            autohold_key = (build.pipeline.layout.tenant.name,
+                            build.build_set.item.change.project.canonical_name,
+                            build.job.name)
+
+            try:
+                self.nodepool.holdNodeSet(nodeset, autohold_key)
+            except Exception:
+                self.log.exception("Unable to process autohold for %s",
+                                   autohold_key)
+
             self.nodepool.returnNodeSet(nodeset)
         except Exception:
             self.log.exception("Unable to return nodeset %s" % (nodeset,))
diff --git a/zuul/zk.py b/zuul/zk.py
index 31b85ea..5ea4e56 100644
--- a/zuul/zk.py
+++ b/zuul/zk.py
@@ -15,19 +15,12 @@
 import json
 import logging
 import time
+
 from kazoo.client import KazooClient, KazooState
 from kazoo import exceptions as kze
 from kazoo.recipe.lock import Lock
 
-# States:
-# We are building this node but it is not ready for use.
-BUILDING = 'building'
-# The node is ready for use.
-READY = 'ready'
-# The node should be deleted.
-DELETING = 'deleting'
-
-STATES = set([BUILDING, READY, DELETING])
+import zuul.model
 
 
 class LockException(Exception):
@@ -246,3 +239,25 @@
             raise LockException("Node %s does not hold a lock" % (node,))
         node.lock.release()
         node.lock = None
+
+    def heldNodeCount(self, autohold_key):
+        '''
+        Count the number of nodes being held for the given tenant/project/job.
+
+        :param set autohold_key: A set with the tenant/project/job names.
+        '''
+        identifier = " ".join(autohold_key)
+        try:
+            nodes = self.client.get_children(self.NODE_ROOT)
+        except kze.NoNodeError:
+            return 0
+
+        count = 0
+        for nodeid in nodes:
+            node_path = '%s/%s' % (self.NODE_ROOT, nodeid)
+            node_data, node_stat = self.client.get(node_path)
+            node_data = self._strToDict(node_data)
+            if (node_data['state'] == zuul.model.STATE_HOLD and
+                    node_data.get('hold_job') == identifier):
+                count += 1
+        return count