Cancel/return nodepool requests on job cancel

When canceling a running or requested job, cancel outstanding
nodepool requests or return unused nodes.

Change-Id: I77f8869b9d751ccd6c9f398ed03ef5ac482cc204
diff --git a/tests/test_nodepool.py b/tests/test_nodepool.py
index 677ae73..78d85aa 100644
--- a/tests/test_nodepool.py
+++ b/tests/test_nodepool.py
@@ -105,3 +105,18 @@
         self.waitForRequests()
         self.assertEqual(len(self.provisioned_requests), 1)
         self.assertEqual(request.state, 'fulfilled')
+
+    def test_node_request_canceled(self):
+        # Test that node requests can be canceled
+
+        nodeset = model.NodeSet()
+        nodeset.addNode(model.Node('controller', 'ubuntu-xenial'))
+        nodeset.addNode(model.Node('compute', 'ubuntu-xenial'))
+        job = model.Job('testjob')
+        job.nodeset = nodeset
+        self.fake_nodepool.paused = True
+        request = self.nodepool.requestNodes(None, job)
+        self.nodepool.cancelRequest(request)
+
+        self.waitForRequests()
+        self.assertEqual(len(self.provisioned_requests), 0)
diff --git a/zuul/launcher/client.py b/zuul/launcher/client.py
index 9fbf1bb..e17c83c 100644
--- a/zuul/launcher/client.py
+++ b/zuul/launcher/client.py
@@ -427,6 +427,7 @@
         return build
 
     def cancel(self, build):
+        # Returns whether a running build was canceled
         self.log.info("Cancel build %s for job %s" % (build, build.job))
 
         build.canceled = True
@@ -434,21 +435,21 @@
             job = build.__gearman_job  # noqa
         except AttributeError:
             self.log.debug("Build %s has no associated gearman job" % build)
-            return
+            return False
 
         # TODOv3(jeblair): make a nicer way of recording build start.
         if build.url is not None:
             self.log.debug("Build %s has already started" % build)
             self.cancelRunningBuild(build)
             self.log.debug("Canceled running build %s" % build)
-            return
+            return True
         else:
             self.log.debug("Build %s has not started yet" % build)
 
         self.log.debug("Looking for build %s in queue" % build)
         if self.cancelJobInQueue(build):
             self.log.debug("Removed build %s from queue" % build)
-            return
+            return False
 
         time.sleep(1)
 
@@ -457,7 +458,7 @@
             self.log.debug("Build %s has just started" % build)
             self.log.debug("Canceled running build %s" % build)
             self.cancelRunningBuild(build)
-            return
+            return True
         self.log.debug("Unable to cancel build %s" % build)
 
     def onBuildCompleted(self, job, result=None):
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index f5a35cd..7f64986 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -396,11 +396,20 @@
             self.sched.nodepool.cancelRequest(req)
         old_build_set.node_requests = {}
         for build in old_build_set.getBuilds():
+            was_running = False
             try:
-                self.sched.launcher.cancel(build)
+                was_running = self.sched.launcher.cancel(build)
             except:
                 self.log.exception("Exception while canceling build %s "
                                    "for change %s" % (build, item.change))
+            if not was_running:
+                try:
+                    nodeset = build.build_set.getJobNodeSet(build.job.name)
+                    self.nodepool.returnNodeset(nodeset)
+                except Exception:
+                    self.log.exception("Unable to return nodeset %s for "
+                                       "canceled build request %s" %
+                                       (nodeset, build))
             build.result = 'CANCELED'
             canceled = True
         for item_behind in item.items_behind:
diff --git a/zuul/nodepool.py b/zuul/nodepool.py
index 11b02b6..4d0442f 100644
--- a/zuul/nodepool.py
+++ b/zuul/nodepool.py
@@ -35,8 +35,13 @@
         return req
 
     def cancelRequest(self, request):
-        if request in self.requests:
-            self.requests.remove(request)
+        self.log.debug("Canceling node request: %s" % (request,))
+        if request.uid in self.requests:
+            try:
+                self.sched.zk.deleteNodeRequest(request)
+            except Exception:
+                self.log.exception("Error deleting node request:")
+            del self.requests[request.uid]
 
     def useNodeset(self, nodeset):
         for node in nodeset.getNodes():
@@ -51,7 +56,7 @@
                 raise Exception("Node %s is not locked" % (node,))
             if node.state == 'in-use':
                 node.state = 'used'
-            self.sched.zk.storeNode(node)
+                self.sched.zk.storeNode(node)
         self._unlockNodes(nodeset.getNodes())
 
     def unlockNodeset(self, nodeset):