Cancel/return nodepool requests on job cancel
When canceling a running or requested job, cancel outstanding
nodepool requests or return unused nodes.
Change-Id: I77f8869b9d751ccd6c9f398ed03ef5ac482cc204
diff --git a/tests/test_nodepool.py b/tests/test_nodepool.py
index 677ae73..78d85aa 100644
--- a/tests/test_nodepool.py
+++ b/tests/test_nodepool.py
@@ -105,3 +105,18 @@
self.waitForRequests()
self.assertEqual(len(self.provisioned_requests), 1)
self.assertEqual(request.state, 'fulfilled')
+
+ def test_node_request_canceled(self):
+ # Test that node requests can be canceled
+
+ nodeset = model.NodeSet()
+ nodeset.addNode(model.Node('controller', 'ubuntu-xenial'))
+ nodeset.addNode(model.Node('compute', 'ubuntu-xenial'))
+ job = model.Job('testjob')
+ job.nodeset = nodeset
+ self.fake_nodepool.paused = True
+ request = self.nodepool.requestNodes(None, job)
+ self.nodepool.cancelRequest(request)
+
+ self.waitForRequests()
+ self.assertEqual(len(self.provisioned_requests), 0)
diff --git a/zuul/launcher/client.py b/zuul/launcher/client.py
index 9fbf1bb..e17c83c 100644
--- a/zuul/launcher/client.py
+++ b/zuul/launcher/client.py
@@ -427,6 +427,7 @@
return build
def cancel(self, build):
+ # Returns whether a running build was canceled
self.log.info("Cancel build %s for job %s" % (build, build.job))
build.canceled = True
@@ -434,21 +435,21 @@
job = build.__gearman_job # noqa
except AttributeError:
self.log.debug("Build %s has no associated gearman job" % build)
- return
+ return False
# TODOv3(jeblair): make a nicer way of recording build start.
if build.url is not None:
self.log.debug("Build %s has already started" % build)
self.cancelRunningBuild(build)
self.log.debug("Canceled running build %s" % build)
- return
+ return True
else:
self.log.debug("Build %s has not started yet" % build)
self.log.debug("Looking for build %s in queue" % build)
if self.cancelJobInQueue(build):
self.log.debug("Removed build %s from queue" % build)
- return
+ return False
time.sleep(1)
@@ -457,7 +458,7 @@
self.log.debug("Build %s has just started" % build)
self.log.debug("Canceled running build %s" % build)
self.cancelRunningBuild(build)
- return
+ return True
self.log.debug("Unable to cancel build %s" % build)
def onBuildCompleted(self, job, result=None):
diff --git a/zuul/manager/__init__.py b/zuul/manager/__init__.py
index f5a35cd..7f64986 100644
--- a/zuul/manager/__init__.py
+++ b/zuul/manager/__init__.py
@@ -396,11 +396,20 @@
self.sched.nodepool.cancelRequest(req)
old_build_set.node_requests = {}
for build in old_build_set.getBuilds():
+ was_running = False
try:
- self.sched.launcher.cancel(build)
+ was_running = self.sched.launcher.cancel(build)
except:
self.log.exception("Exception while canceling build %s "
"for change %s" % (build, item.change))
+ if not was_running:
+ try:
+ nodeset = build.build_set.getJobNodeSet(build.job.name)
+ self.nodepool.returnNodeset(nodeset)
+ except Exception:
+ self.log.exception("Unable to return nodeset %s for "
+ "canceled build request %s" %
+ (nodeset, build))
build.result = 'CANCELED'
canceled = True
for item_behind in item.items_behind:
diff --git a/zuul/nodepool.py b/zuul/nodepool.py
index 11b02b6..4d0442f 100644
--- a/zuul/nodepool.py
+++ b/zuul/nodepool.py
@@ -35,8 +35,13 @@
return req
def cancelRequest(self, request):
- if request in self.requests:
- self.requests.remove(request)
+ self.log.debug("Canceling node request: %s" % (request,))
+ if request.uid in self.requests:
+ try:
+ self.sched.zk.deleteNodeRequest(request)
+ except Exception:
+ self.log.exception("Error deleting node request:")
+ del self.requests[request.uid]
def useNodeset(self, nodeset):
for node in nodeset.getNodes():
@@ -51,7 +56,7 @@
raise Exception("Node %s is not locked" % (node,))
if node.state == 'in-use':
node.state = 'used'
- self.sched.zk.storeNode(node)
+ self.sched.zk.storeNode(node)
self._unlockNodes(nodeset.getNodes())
def unlockNodeset(self, nodeset):