Merge "Add tenant project group definition example and definition in the doc"
diff --git a/doc/source/admin/components.rst b/doc/source/admin/components.rst
index 88b898f..ba14752 100644
--- a/doc/source/admin/components.rst
+++ b/doc/source/admin/components.rst
@@ -442,6 +442,11 @@
Port to use for finger log streamer.
+ .. attr:: state_dir
+ :default: /var/lib/zuul
+
+ Path to directory in which Zuul should save its state.
+
.. attr:: git_dir
:default: /var/lib/zuul/git
diff --git a/doc/source/admin/monitoring.rst b/doc/source/admin/monitoring.rst
index d43fd03..1c17c28 100644
--- a/doc/source/admin/monitoring.rst
+++ b/doc/source/admin/monitoring.rst
@@ -131,6 +131,14 @@
component of the key will be replaced with the hostname of the
executor.
+ .. stat:: merger.<result>
+ :type: counter
+
+ Incremented to represent the status of a Zuul executor's merger
+ operations. ``<result>`` can be either ``SUCCESS`` or ``FAILURE``.
+ A failed merge operation which would be accounted for as a ``FAILURE``
+ is what ends up being returned by Zuul as a ``MERGER_FAILURE``.
+
.. stat:: builds
:type: counter
@@ -148,6 +156,27 @@
The number of builds currently running on this executor. This
includes starting builds.
+ .. stat:: phase
+
+ Subtree detailing per-phase execution statistics:
+
+ .. stat:: <phase>
+
+ ``<phase>`` represents a phase in the execution of a job.
+ This can be an *internal* phase (such as ``setup`` or ``cleanup``) as
+ well as *job* phases such as ``pre``, ``run`` or ``post``.
+
+ .. stat:: <result>
+ :type: counter
+
+ A counter for each type of result.
+ These results do not, by themselves, determine the status of a build
+ but are indicators of the exit status provided by Ansible for the
+ execution of a particular phase.
+
+ Example of possible counters for each phase are: ``RESULT_NORMAL``,
+ ``RESULT_TIMED_OUT``, ``RESULT_UNREACHABLE``, ``RESULT_ABORTED``.
+
.. stat:: load_average
:type: gauge
diff --git a/requirements.txt b/requirements.txt
index f24f195..7057c5a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,6 +25,6 @@
cachecontrol
pyjwt
iso8601
-aiohttp
+aiohttp<3.0.0
uvloop;python_version>='3.5'
psutil
diff --git a/tests/base.py b/tests/base.py
index f68f59a..70889bb 100755
--- a/tests/base.py
+++ b/tests/base.py
@@ -1640,6 +1640,10 @@
nodeid = path.split("/")[-1]
return nodeid
+ def removeNode(self, node):
+ path = self.NODE_ROOT + '/' + node["_oid"]
+ self.client.delete(path, recursive=True)
+
def addFailRequest(self, request):
self.fail_requests.add(request['_oid'])
diff --git a/tests/fixtures/layouts/timer-github.yaml b/tests/fixtures/layouts/timer-github.yaml
new file mode 100644
index 0000000..4f3efe4
--- /dev/null
+++ b/tests/fixtures/layouts/timer-github.yaml
@@ -0,0 +1,25 @@
+- pipeline:
+ name: periodic
+ manager: independent
+ trigger:
+ timer:
+ - time: '* * * * * */1'
+
+- job:
+ name: base
+ parent: null
+ run: playbooks/base.yaml
+
+- job:
+ name: project-bitrot
+ nodeset:
+ nodes:
+ - name: static
+ label: ubuntu-xenial
+ run: playbooks/project-bitrot.yaml
+
+- project:
+ name: org/project
+ periodic:
+ jobs:
+ - project-bitrot
diff --git a/tests/unit/test_github_driver.py b/tests/unit/test_github_driver.py
index cd36ba3..8978415 100644
--- a/tests/unit/test_github_driver.py
+++ b/tests/unit/test_github_driver.py
@@ -210,6 +210,34 @@
self.waitUntilSettled()
self.assertEqual(1, len(self.history))
+ @simple_layout('layouts/basic-github.yaml', driver='github')
+ def test_timer_event(self):
+ self.executor_server.hold_jobs_in_build = True
+ self.commitConfigUpdate('org/common-config',
+ 'layouts/timer-github.yaml')
+ self.sched.reconfigure(self.config)
+ time.sleep(2)
+ self.waitUntilSettled()
+ self.assertEqual(len(self.builds), 1)
+ self.executor_server.hold_jobs_in_build = False
+ # Stop queuing timer triggered jobs so that the assertions
+ # below don't race against more jobs being queued.
+ self.commitConfigUpdate('org/common-config',
+ 'layouts/basic-github.yaml')
+ self.sched.reconfigure(self.config)
+ self.waitUntilSettled()
+ # If APScheduler is in mid-event when we remove the job, we
+ # can end up with one more event firing, so give it an extra
+ # second to settle.
+ time.sleep(1)
+ self.waitUntilSettled()
+ self.executor_server.release()
+ self.waitUntilSettled()
+ self.assertHistory([
+ dict(name='project-bitrot', result='SUCCESS',
+ ref='refs/heads/master'),
+ ], ordered=False)
+
@simple_layout('layouts/dequeue-github.yaml', driver='github')
def test_dequeue_pull_synchronized(self):
self.executor_server.hold_jobs_in_build = True
diff --git a/tests/unit/test_scheduler.py b/tests/unit/test_scheduler.py
index 9b54084..c833fa2 100755
--- a/tests/unit/test_scheduler.py
+++ b/tests/unit/test_scheduler.py
@@ -1506,7 +1506,7 @@
self.gearman_server.port)
self.addCleanup(client.shutdown)
r = client.autohold('tenant-one', 'org/project', 'project-test2',
- "reason text", 1)
+ "", "", "reason text", 1)
self.assertTrue(r)
# First check that successful jobs do not autohold
@@ -1553,7 +1553,7 @@
held_node['hold_job'],
" ".join(['tenant-one',
'review.example.com/org/project',
- 'project-test2'])
+ 'project-test2', '.*'])
)
self.assertEqual(held_node['comment'], "reason text")
@@ -1573,13 +1573,151 @@
held_nodes += 1
self.assertEqual(held_nodes, 1)
+ def _test_autohold_scoped(self, change_obj, change, ref):
+ client = zuul.rpcclient.RPCClient('127.0.0.1',
+ self.gearman_server.port)
+ self.addCleanup(client.shutdown)
+
+ # create two changes on the same project, and autohold request
+ # for one of them.
+ other = self.fake_gerrit.addFakeChange(
+ 'org/project', 'master', 'other'
+ )
+
+ r = client.autohold('tenant-one', 'org/project', 'project-test2',
+ str(change), ref, "reason text", 1)
+ self.assertTrue(r)
+
+ # First, check that an unrelated job does not trigger autohold, even
+ # when it failed
+ self.executor_server.failJob('project-test2', other)
+ self.fake_gerrit.addEvent(other.getPatchsetCreatedEvent(1))
+
+ self.waitUntilSettled()
+
+ self.assertEqual(other.data['status'], 'NEW')
+ self.assertEqual(other.reported, 1)
+ # project-test2
+ self.assertEqual(self.history[0].result, 'FAILURE')
+
+ # Check nodepool for a held node
+ held_node = None
+ for node in self.fake_nodepool.getNodes():
+ if node['state'] == zuul.model.STATE_HOLD:
+ held_node = node
+ break
+ self.assertIsNone(held_node)
+
+ # And then verify that failed job for the defined change
+ # triggers the autohold
+
+ self.executor_server.failJob('project-test2', change_obj)
+ self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1))
+
+ self.waitUntilSettled()
+
+ self.assertEqual(change_obj.data['status'], 'NEW')
+ self.assertEqual(change_obj.reported, 1)
+ # project-test2
+ self.assertEqual(self.history[1].result, 'FAILURE')
+
+ # Check nodepool for a held node
+ held_node = None
+ for node in self.fake_nodepool.getNodes():
+ if node['state'] == zuul.model.STATE_HOLD:
+ held_node = node
+ break
+ self.assertIsNotNone(held_node)
+
+ # Validate node has recorded the failed job
+ if change != "":
+ ref = "refs/changes/%s/%s/.*" % (
+ str(change_obj.number)[-1:], str(change_obj.number)
+ )
+
+ self.assertEqual(
+ held_node['hold_job'],
+ " ".join(['tenant-one',
+ 'review.example.com/org/project',
+ 'project-test2', ref])
+ )
+ self.assertEqual(held_node['comment'], "reason text")
+
+ @simple_layout('layouts/autohold.yaml')
+ def test_autohold_change(self):
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+
+ self._test_autohold_scoped(A, change=A.number, ref="")
+
+ @simple_layout('layouts/autohold.yaml')
+ def test_autohold_ref(self):
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+ ref = A.data['currentPatchSet']['ref']
+ self._test_autohold_scoped(A, change="", ref=ref)
+
+ @simple_layout('layouts/autohold.yaml')
+ def test_autohold_scoping(self):
+ client = zuul.rpcclient.RPCClient('127.0.0.1',
+ self.gearman_server.port)
+ self.addCleanup(client.shutdown)
+
+ A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+
+ # create three autohold requests, scoped to job, change and
+ # a specific ref
+ change = str(A.number)
+ ref = A.data['currentPatchSet']['ref']
+ r1 = client.autohold('tenant-one', 'org/project', 'project-test2',
+ "", "", "reason text", 1)
+ self.assertTrue(r1)
+ r2 = client.autohold('tenant-one', 'org/project', 'project-test2',
+ change, "", "reason text", 1)
+ self.assertTrue(r2)
+ r3 = client.autohold('tenant-one', 'org/project', 'project-test2',
+ "", ref, "reason text", 1)
+ self.assertTrue(r3)
+
+ # Fail 3 jobs for the same change, and verify that the autohold
+ # requests are fullfilled in the expected order: from the most
+ # specific towards the most generic one.
+
+ def _fail_job_and_verify_autohold_request(change_obj, ref_filter):
+ self.executor_server.failJob('project-test2', change_obj)
+ self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1))
+
+ self.waitUntilSettled()
+
+ # Check nodepool for a held node
+ held_node = None
+ for node in self.fake_nodepool.getNodes():
+ if node['state'] == zuul.model.STATE_HOLD:
+ held_node = node
+ break
+ self.assertIsNotNone(held_node)
+
+ self.assertEqual(
+ held_node['hold_job'],
+ " ".join(['tenant-one',
+ 'review.example.com/org/project',
+ 'project-test2', ref_filter])
+ )
+ self.assertFalse(held_node['_lock'], "Node %s is locked" %
+ (node['_oid'],))
+ self.fake_nodepool.removeNode(held_node)
+
+ _fail_job_and_verify_autohold_request(A, ref)
+
+ ref = "refs/changes/%s/%s/.*" % (str(change)[-1:], str(change))
+ _fail_job_and_verify_autohold_request(A, ref)
+ _fail_job_and_verify_autohold_request(A, ".*")
+
@simple_layout('layouts/autohold.yaml')
def test_autohold_ignores_aborted_jobs(self):
client = zuul.rpcclient.RPCClient('127.0.0.1',
self.gearman_server.port)
self.addCleanup(client.shutdown)
r = client.autohold('tenant-one', 'org/project', 'project-test2',
- "reason text", 1)
+ "", "", "reason text", 1)
self.assertTrue(r)
self.executor_server.hold_jobs_in_build = True
@@ -1623,7 +1761,7 @@
self.addCleanup(client.shutdown)
r = client.autohold('tenant-one', 'org/project', 'project-test2',
- "reason text", 1)
+ "", "", "reason text", 1)
self.assertTrue(r)
autohold_requests = client.autohold_list()
@@ -1632,11 +1770,12 @@
# The single dict key should be a CSV string value
key = list(autohold_requests.keys())[0]
- tenant, project, job = key.split(',')
+ tenant, project, job, ref_filter = key.split(',')
self.assertEqual('tenant-one', tenant)
self.assertIn('org/project', project)
self.assertEqual('project-test2', job)
+ self.assertEqual(".*", ref_filter)
# Note: the value is converted from set to list by json.
self.assertEqual([1, "reason text"], autohold_requests[key])
diff --git a/tools/encrypt_secret.py b/tools/encrypt_secret.py
index 4cb1666..45ad68c 100755
--- a/tools/encrypt_secret.py
+++ b/tools/encrypt_secret.py
@@ -26,9 +26,11 @@
try:
from urllib.request import Request
from urllib.request import urlopen
+ from urllib.parse import urlparse
except ImportError:
from urllib2 import Request
from urllib2 import urlopen
+ from urlparse import urlparse
DESCRIPTION = """Encrypt a secret for Zuul.
@@ -43,7 +45,6 @@
parser.add_argument('url',
help="The base URL of the zuul server and tenant. "
"E.g., https://zuul.example.com/tenant-name")
- # TODO(jeblair): Throw a fit if SSL is not used.
parser.add_argument('project',
help="The name of the project.")
parser.add_argument('--strip', action='store_true', default=False,
@@ -60,6 +61,15 @@
"to standard output.")
args = parser.parse_args()
+ # We should not use unencrypted connections for retrieving the public key.
+ # Otherwise our secret can be compromised. The schemes file and https are
+ # considered safe.
+ url = urlparse(args.url)
+ if url.scheme not in ('file', 'https'):
+ sys.stderr.write("WARNING: Retrieving encryption key via an "
+ "unencrypted connection. Your secret may get "
+ "compromised.\n")
+
req = Request("%s/%s.pub" % (args.url.rstrip('/'), args.project))
pubkey = urlopen(req)
diff --git a/zuul/ansible/callback/zuul_stream.py b/zuul/ansible/callback/zuul_stream.py
index df28a57..15b491c 100644
--- a/zuul/ansible/callback/zuul_stream.py
+++ b/zuul/ansible/callback/zuul_stream.py
@@ -367,12 +367,13 @@
self._log_message(
result, status='MODULE FAILURE',
msg=result_dict['module_stderr'])
- elif (len([key for key in result_dict.keys()
- if not key.startswith('_ansible')]) == 1):
+ elif result._task.action == 'debug':
# this is a debug statement, handle it special
for key in [k for k in result_dict.keys()
if k.startswith('_ansible')]:
del result_dict[key]
+ if 'changed' in result_dict.keys():
+ del result_dict['changed']
keyname = next(iter(result_dict.keys()))
# If it has msg, that means it was like:
#
diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py
index ebf59b9..a7b3ef3 100755
--- a/zuul/cmd/client.py
+++ b/zuul/cmd/client.py
@@ -51,6 +51,11 @@
required=True)
cmd_autohold.add_argument('--job', help='job name',
required=True)
+ cmd_autohold.add_argument('--change',
+ help='specific change to hold nodes for',
+ required=False, default='')
+ cmd_autohold.add_argument('--ref', help='git ref to hold nodes for',
+ required=False, default='')
cmd_autohold.add_argument('--reason', help='reason for the hold',
required=True)
cmd_autohold.add_argument('--count',
@@ -173,9 +178,15 @@
def autohold(self):
client = zuul.rpcclient.RPCClient(
self.server, self.port, self.ssl_key, self.ssl_cert, self.ssl_ca)
+ if self.args.change and self.args.ref:
+ print("Change and ref can't be both used for the same request")
+ return False
+
r = client.autohold(tenant=self.args.tenant,
project=self.args.project,
job=self.args.job,
+ change=self.args.change,
+ ref=self.args.ref,
reason=self.args.reason,
count=self.args.count)
return r
@@ -190,14 +201,19 @@
return True
table = prettytable.PrettyTable(
- field_names=['Tenant', 'Project', 'Job', 'Count', 'Reason'])
+ field_names=[
+ 'Tenant', 'Project', 'Job', 'Ref Filter', 'Count', 'Reason'
+ ])
for key, value in autohold_requests.items():
# The key comes to us as a CSV string because json doesn't like
# non-str keys.
- tenant_name, project_name, job_name = key.split(',')
+ tenant_name, project_name, job_name, ref_filter = key.split(',')
count, reason = value
- table.add_row([tenant_name, project_name, job_name, count, reason])
+
+ table.add_row([
+ tenant_name, project_name, job_name, ref_filter, count, reason
+ ])
print(table)
return True
diff --git a/zuul/driver/gerrit/gerritsource.py b/zuul/driver/gerrit/gerritsource.py
index fdc1ad7..8f3408e 100644
--- a/zuul/driver/gerrit/gerritsource.py
+++ b/zuul/driver/gerrit/gerritsource.py
@@ -141,6 +141,10 @@
)
return [f]
+ def getRefForChange(self, change):
+ partial = change[-2:]
+ return "refs/changes/%s/%s/.*" % (partial, change)
+
approval = vs.Schema({'username': str,
'email': str,
diff --git a/zuul/driver/git/gitsource.py b/zuul/driver/git/gitsource.py
index a7d42be..9f0963d 100644
--- a/zuul/driver/git/gitsource.py
+++ b/zuul/driver/git/gitsource.py
@@ -68,3 +68,6 @@
def getRejectFilters(self, config):
return []
+
+ def getRefForChange(self, change):
+ raise NotImplemented()
diff --git a/zuul/driver/github/githubconnection.py b/zuul/driver/github/githubconnection.py
index 27d31b4..6dfcdd3 100644
--- a/zuul/driver/github/githubconnection.py
+++ b/zuul/driver/github/githubconnection.py
@@ -722,7 +722,8 @@
change.newrev = event.newrev
change.url = self.getGitwebUrl(project, sha=event.newrev)
change.source_event = event
- change.files = self.getPushedFileNames(event)
+ if hasattr(event, 'commits'):
+ change.files = self.getPushedFileNames(event)
return change
def _getChange(self, project, number, patchset=None, refresh=False):
diff --git a/zuul/driver/github/githubsource.py b/zuul/driver/github/githubsource.py
index 33f8f7c..6f9b14d 100644
--- a/zuul/driver/github/githubsource.py
+++ b/zuul/driver/github/githubsource.py
@@ -144,6 +144,9 @@
)
return [f]
+ def getRefForChange(self, change):
+ return "refs/pull/%s/head" % change
+
review = v.Schema({'username': str,
'email': str,
diff --git a/zuul/executor/server.py b/zuul/executor/server.py
index a2a9b42..7384aa5 100644
--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@@ -780,8 +780,17 @@
ret = merger.mergeChanges(items, repo_state=repo_state)
if not ret: # merge conflict
result = dict(result='MERGER_FAILURE')
+ if self.executor_server.statsd:
+ base_key = ("zuul.executor.%s.merger" %
+ self.executor_server.hostname)
+ self.executor_server.statsd.incr(base_key + ".FAILURE")
self.job.sendWorkComplete(json.dumps(result))
return False
+
+ if self.executor_server.statsd:
+ base_key = ("zuul.executor.%s.merger" %
+ self.executor_server.hostname)
+ self.executor_server.statsd.incr(base_key + ".SUCCESS")
recent = ret[3]
for key, commit in recent.items():
(connection, project, branch) = key
@@ -835,6 +844,13 @@
repo.checkout(selected_ref)
return selected_ref
+ def getAnsibleTimeout(self, start, timeout):
+ if timeout is not None:
+ now = time.time()
+ elapsed = now - start
+ timeout = timeout - elapsed
+ return timeout
+
def runPlaybooks(self, args):
result = None
@@ -852,10 +868,15 @@
pre_failed = False
success = False
self.started = True
+ time_started = time.time()
+ # timeout value is total job timeout or put another way
+ # the cummulative time that pre, run, and post can consume.
+ job_timeout = args['timeout']
for index, playbook in enumerate(self.jobdir.pre_playbooks):
# TODOv3(pabelanger): Implement pre-run timeout setting.
+ ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
pre_status, pre_code = self.runAnsiblePlaybook(
- playbook, args['timeout'], phase='pre', index=index)
+ playbook, ansible_timeout, phase='pre', index=index)
if pre_status != self.RESULT_NORMAL or pre_code != 0:
# These should really never fail, so return None and have
# zuul try again
@@ -863,8 +884,9 @@
break
if not pre_failed:
+ ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
job_status, job_code = self.runAnsiblePlaybook(
- self.jobdir.playbook, args['timeout'], phase='run')
+ self.jobdir.playbook, ansible_timeout, phase='run')
if job_status == self.RESULT_ABORTED:
return 'ABORTED'
elif job_status == self.RESULT_TIMED_OUT:
@@ -885,8 +907,9 @@
for index, playbook in enumerate(self.jobdir.post_playbooks):
# TODOv3(pabelanger): Implement post-run timeout setting.
+ ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
post_status, post_code = self.runAnsiblePlaybook(
- playbook, args['timeout'], success, phase='post', index=index)
+ playbook, ansible_timeout, success, phase='post', index=index)
if post_status == self.RESULT_ABORTED:
return 'ABORTED'
if post_status != self.RESULT_NORMAL or post_code != 0:
@@ -1465,6 +1488,11 @@
wrapped=False)
self.log.debug("Ansible complete, result %s code %s" % (
self.RESULT_MAP[result], code))
+ if self.executor_server.statsd:
+ base_key = ("zuul.executor.%s.phase.setup" %
+ self.executor_server.hostname)
+ self.executor_server.statsd.incr(base_key + ".%s" %
+ self.RESULT_MAP[result])
return result, code
def runAnsibleCleanup(self, playbook):
@@ -1485,6 +1513,11 @@
wrapped=False)
self.log.debug("Ansible complete, result %s code %s" % (
self.RESULT_MAP[result], code))
+ if self.executor_server.statsd:
+ base_key = ("zuul.executor.%s.phase.cleanup" %
+ self.executor_server.hostname)
+ self.executor_server.statsd.incr(base_key + ".%s" %
+ self.RESULT_MAP[result])
return result, code
def emitPlaybookBanner(self, playbook, step, phase, result=None):
@@ -1554,6 +1587,11 @@
cmd=cmd, timeout=timeout, playbook=playbook)
self.log.debug("Ansible complete, result %s code %s" % (
self.RESULT_MAP[result], code))
+ if self.executor_server.statsd:
+ base_key = ("zuul.executor.%s.phase.%s" %
+ (self.executor_server.hostname, phase or 'unknown'))
+ self.executor_server.statsd.incr(base_key + ".%s" %
+ self.RESULT_MAP[result])
self.emitPlaybookBanner(playbook, 'END', phase, result=result)
return result, code
@@ -1636,7 +1674,7 @@
'load_multiplier', '2.5'))
self.max_load_avg = multiprocessing.cpu_count() * load_multiplier
self.max_starting_builds = self.max_load_avg * 2
- self.min_starting_builds = 4
+ self.min_starting_builds = max(int(multiprocessing.cpu_count() / 2), 1)
self.min_avail_mem = float(get_default(self.config, 'executor',
'min_avail_mem', '5.0'))
self.accepting_work = False
diff --git a/zuul/merger/merger.py b/zuul/merger/merger.py
index 07f3e69..5e102b4 100644
--- a/zuul/merger/merger.py
+++ b/zuul/merger/merger.py
@@ -261,14 +261,6 @@
repo.git.checkout(ref)
return repo.head.commit
- def checkoutLocalBranch(self, branch):
- # TODO(jeblair): retire in favor of checkout
- repo = self.createRepoObject()
- # Perform a hard reset before checking out so that we clean up
- # anything that might be left over from a merge.
- reset_repo_to_head(repo)
- repo.heads[branch].checkout()
-
def cherryPick(self, ref):
repo = self.createRepoObject()
self.log.debug("Cherry-picking %s" % ref)
diff --git a/zuul/rpcclient.py b/zuul/rpcclient.py
index 8f2e5dc..a947ed0 100644
--- a/zuul/rpcclient.py
+++ b/zuul/rpcclient.py
@@ -48,10 +48,12 @@
self.log.debug("Job complete, success: %s" % (not job.failure))
return job
- def autohold(self, tenant, project, job, reason, count):
+ def autohold(self, tenant, project, job, change, ref, reason, count):
data = {'tenant': tenant,
'project': project,
'job': job,
+ 'change': change,
+ 'ref': ref,
'reason': reason,
'count': count}
return not self.submitJob('zuul:autohold', data).failure
diff --git a/zuul/rpclistener.py b/zuul/rpclistener.py
index e5016df..f3f55f6 100644
--- a/zuul/rpclistener.py
+++ b/zuul/rpclistener.py
@@ -150,7 +150,20 @@
job.sendWorkException(error.encode('utf8'))
return
+ if args['change'] and args['ref']:
+ job.sendWorkException("Change and ref can't be both used "
+ "for the same request")
+
+ if args['change']:
+ # Convert change into ref based on zuul connection
+ ref_filter = project.source.getRefForChange(args['change'])
+ elif args['ref']:
+ ref_filter = "%s" % args['ref']
+ else:
+ ref_filter = ".*"
+
params['job_name'] = args['job']
+ params['ref_filter'] = ref_filter
params['reason'] = args['reason']
if args['count'] < 0:
diff --git a/zuul/scheduler.py b/zuul/scheduler.py
index 14ca029..2bce43f 100644
--- a/zuul/scheduler.py
+++ b/zuul/scheduler.py
@@ -19,6 +19,7 @@
import logging
import os
import pickle
+import re
import queue
import socket
import sys
@@ -435,8 +436,9 @@
self.last_reconfigured = int(time.time())
# TODOv3(jeblair): reconfigure time should be per-tenant
- def autohold(self, tenant_name, project_name, job_name, reason, count):
- key = (tenant_name, project_name, job_name)
+ def autohold(self, tenant_name, project_name, job_name, ref_filter,
+ reason, count):
+ key = (tenant_name, project_name, job_name, ref_filter)
if count == 0 and key in self.autohold_requests:
self.log.debug("Removing autohold for %s", key)
del self.autohold_requests[key]
@@ -972,6 +974,84 @@
self.log.exception("Exception estimating build time:")
pipeline.manager.onBuildStarted(event.build)
+ def _getAutoholdRequestKey(self, build):
+ change = build.build_set.item.change
+
+ autohold_key_base = (build.pipeline.layout.tenant.name,
+ change.project.canonical_name,
+ build.job.name)
+
+ class Scope(object):
+ """Enum defining a precedence/priority of autohold requests.
+
+ Autohold requests for specific refs should be fulfilled first,
+ before those for changes, and generic jobs.
+
+ Matching algorithm goes over all existing autohold requests, and
+ returns one with the highest number (in case of duplicated
+ requests the last one wins).
+ """
+ NONE = 0
+ JOB = 1
+ CHANGE = 2
+ REF = 3
+
+ def autohold_key_base_issubset(base, request_key):
+ """check whether the given key is a subset of the build key"""
+ index = 0
+ base_len = len(base)
+ while index < base_len:
+ if base[index] != request_key[index]:
+ return False
+ index += 1
+ return True
+
+ # Do a partial match of the autohold key against all autohold
+ # requests, ignoring the last element of the key (ref filter),
+ # and finally do a regex match between ref filter from
+ # the autohold request and the build's change ref to check
+ # if it matches. Lastly, make sure that we match the most
+ # specific autohold request by comparing "scopes"
+ # of requests - the most specific is selected.
+ autohold_key = None
+ scope = Scope.NONE
+ for request in self.autohold_requests:
+ ref_filter = request[-1]
+ if not autohold_key_base_issubset(autohold_key_base, request) \
+ or not re.match(ref_filter, change.ref):
+ continue
+
+ if ref_filter == ".*":
+ candidate_scope = Scope.JOB
+ elif ref_filter.endswith(".*"):
+ candidate_scope = Scope.CHANGE
+ else:
+ candidate_scope = Scope.REF
+
+ if candidate_scope > scope:
+ scope = candidate_scope
+ autohold_key = request
+
+ return autohold_key
+
+ def _processAutohold(self, build):
+
+ # We explicitly only want to hold nodes for jobs if they have
+ # failed and have an autohold request.
+ if build.result != "FAILURE":
+ return
+
+ autohold_key = self._getAutoholdRequestKey(build)
+ try:
+ self.nodepool.holdNodeSet(build.nodeset, autohold_key)
+ except Exception:
+ self.log.exception("Unable to process autohold for %s:",
+ autohold_key)
+ if autohold_key in self.autohold_requests:
+ self.log.debug("Removing autohold %s due to exception",
+ autohold_key)
+ del self.autohold_requests[autohold_key]
+
def _doBuildCompletedEvent(self, event):
build = event.build
@@ -979,27 +1059,10 @@
# to pass this on to the pipeline manager, make sure we return
# the nodes to nodepool.
try:
- nodeset = build.nodeset
- autohold_key = (build.pipeline.layout.tenant.name,
- build.build_set.item.change.project.canonical_name,
- build.job.name)
- if (build.result == "FAILURE" and
- autohold_key in self.autohold_requests):
- # We explicitly only want to hold nodes for jobs if they have
- # failed and have an autohold request.
- try:
- self.nodepool.holdNodeSet(nodeset, autohold_key)
- except Exception:
- self.log.exception("Unable to process autohold for %s:",
- autohold_key)
- if autohold_key in self.autohold_requests:
- self.log.debug("Removing autohold %s due to exception",
- autohold_key)
- del self.autohold_requests[autohold_key]
-
- self.nodepool.returnNodeSet(nodeset)
+ self._processAutohold(build)
+ self.nodepool.returnNodeSet(build.nodeset)
except Exception:
- self.log.exception("Unable to return nodeset %s" % (nodeset,))
+ self.log.exception("Unable to return nodeset %s" % build.nodeset)
if build.build_set is not build.build_set.item.current_build_set:
self.log.debug("Build %s is not in the current build set" %