Merge "Set remote url on every getRepo in merger"
diff --git a/.zuul.yaml b/.zuul.yaml
index d73be8f..caef296 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -25,8 +25,9 @@
       - openstack/ara
-      - zuul/ansible/callback/.*
+      - zuul/ansible/.*
       - playbooks/zuul-stream/.*
+      - requirements.txt
 - project:
diff --git a/doc/source/admin/components.rst b/doc/source/admin/components.rst
index 88b898f..ba14752 100644
--- a/doc/source/admin/components.rst
+++ b/doc/source/admin/components.rst
@@ -442,6 +442,11 @@
       Port to use for finger log streamer.
+   .. attr:: state_dir
+      :default: /var/lib/zuul
+      Path to directory in which Zuul should save its state.
    .. attr:: git_dir
       :default: /var/lib/zuul/git
diff --git a/doc/source/admin/drivers/github.rst b/doc/source/admin/drivers/github.rst
index 83ac77f..a89cfc6 100644
--- a/doc/source/admin/drivers/github.rst
+++ b/doc/source/admin/drivers/github.rst
@@ -40,60 +40,43 @@
+.. NOTE Duplicate content here and in zuul-from-scratch.rst.  Keep them
+   in sync.
 To create a `GitHub application
 * Go to your organization settings page to create the application, e.g.:
 * Set GitHub App name to "my-org-zuul"
 * Set Setup URL to your setup documentation, when user install the application
   they are redirected to this url
 * Set Webhook URL to
 * Create a Webhook secret
 * Set permissions:
   * Commit statuses: Read & Write
   * Issues: Read & Write
   * Pull requests: Read & Write
   * Repository contents: Read & Write (write to let zuul merge change)
+  * Repository administration: Read
 * Set events subscription:
   * Label
   * Status
   * Issue comment
   * Issues
   * Pull request
   * Pull request review
   * Pull request review comment
   * Commit comment
   * Create
   * Push
   * Release
 * Set Where can this GitHub App be installed to "Any account"
 * Create the App
 * Generate a Private key in the app settings page
 Then in the zuul.conf, set webhook_token, app_id and app_key.
diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst
index a2a2ee7..af83a3b 100644
--- a/doc/source/admin/index.rst
+++ b/doc/source/admin/index.rst
@@ -12,6 +12,7 @@
    :maxdepth: 2
+   zuul-from-scratch
diff --git a/doc/source/admin/monitoring.rst b/doc/source/admin/monitoring.rst
index d43fd03..fbcedad 100644
--- a/doc/source/admin/monitoring.rst
+++ b/doc/source/admin/monitoring.rst
@@ -131,6 +131,14 @@
    component of the key will be replaced with the hostname of the
+   .. stat:: merger.<result>
+      :type: counter
+      Incremented to represent the status of a Zuul executor's merger
+      operations. ``<result>`` can be either ``SUCCESS`` or ``FAILURE``.
+      A failed merge operation which would be accounted for as a ``FAILURE``
+      is what ends up being returned by Zuul as a ``MERGER_FAILURE``.
    .. stat:: builds
       :type: counter
@@ -148,16 +156,37 @@
       The number of builds currently running on this executor.  This
       includes starting builds.
+  .. stat:: phase
+     Subtree detailing per-phase execution statistics:
+     .. stat:: <phase>
+        ``<phase>`` represents a phase in the execution of a job.
+        This can be an *internal* phase (such as ``setup`` or ``cleanup``) as
+        well as *job* phases such as ``pre``, ``run`` or ``post``.
+        .. stat:: <result>
+           :type: counter
+           A counter for each type of result.
+           These results do not, by themselves, determine the status of a build
+           but are indicators of the exit status provided by Ansible for the
+           execution of a particular phase.
+           Example of possible counters for each phase are: ``RESULT_NORMAL``,
    .. stat:: load_average
       :type: gauge
       The one-minute load average of this executor, multiplied by 100.
-   .. stat:: pct_available_ram
+   .. stat:: pct_used_ram
       :type: gauge
-      The available RAM (including buffers and cache) on this
-      executor, as a percentage multiplied by 100.
+      The used RAM (excluding buffers and cache) on this executor, as
+      a percentage multiplied by 100.
 .. stat:: zuul.nodepool
diff --git a/doc/source/admin/tenants.rst b/doc/source/admin/tenants.rst
index 48e7ba8..5bcd2a2 100644
--- a/doc/source/admin/tenants.rst
+++ b/doc/source/admin/tenants.rst
@@ -25,7 +25,7 @@
 A tenant is a collection of projects which share a Zuul
-configuration.  An example tenant definition is:
+configuration. Some examples of tenant definitions are:
 .. code-block:: yaml
@@ -46,6 +46,27 @@
              - project2:
                  exclude-unprotected-branches: true
+.. code-block:: yaml
+   - tenant:
+       name: my-tenant
+       source:
+         gerrit:
+           config-projects:
+             - common-config
+           untrusted-projects:
+             - exclude:
+                 - job
+                 - semaphore
+                 - project
+                 - project-template
+                 - nodeset
+                 - secret
+               projects:
+                 - project1
+                 - project2:
+                     exclude-unprotected-branches: true
 .. attr:: tenant
    The following attributes are supported:
@@ -157,6 +178,24 @@
             processed. Defaults to the tenant wide setting of
+      .. attr:: <project-group>
+         The items in the list are dictionaries with the following
+         attributes. A **configuration items** definition is applied
+         to the list of projects.
+         .. attr:: include
+            A list of **configuration items** that should be loaded.
+         .. attr:: exclude
+            A list of **configuration items** that should not be loaded.
+         .. attr:: projects
+            A list of **project** items.
    .. attr:: max-nodes-per-job
       :default: 5
diff --git a/doc/source/admin/zuul-from-scratch.rst b/doc/source/admin/zuul-from-scratch.rst
new file mode 100644
index 0000000..141216b
--- /dev/null
+++ b/doc/source/admin/zuul-from-scratch.rst
@@ -0,0 +1,505 @@
+Zuul From Scratch
+.. note:: This is a work in progress that attempts to walk through all
+          of the steps needed to run Zuul on a cloud server against
+          GitHub projects.
+Environment Setup
+We're going to be using Fedora 27 on a cloud server for this
+Login to your environment
+Since we'll be using a cloud image for Fedora 27, our login user will
+be ``fedora`` which will also be the staging user for installation of
+Zuul and Nodepool.
+To get started, ssh to your machine as the ``fedora`` user::
+   ssh fedora@<ip_address>
+Environment Setup
+   sudo dnf update -y
+   sudo systemctl reboot
+   sudo dnf install git redhat-lsb-core python3 python3-pip python3-devel make gcc openssl-devel python-openstackclient -y
+   pip3 install --user bindep
+Zuul and Nodepool Installation
+Install Zookeeper
+   sudo dnf install zookeeper -y
+Install Nodepool
+   sudo adduser --system nodepool --home-dir /var/lib/nodepool --create-home
+   git clone
+   cd nodepool/
+   sudo dnf -y install $(bindep -b)
+   sudo pip3 install .
+Install Zuul
+   sudo adduser --system zuul --home-dir /var/lib/zuul --create-home
+   git clone
+   cd zuul/
+   sudo dnf install $(bindep -b) -y
+   sudo pip3 install git+
+   sudo pip3 install .
+Zookeeper Setup
+.. TODO recommended reading for zk clustering setup
+   sudo bash -c 'echo "1" > /etc/zookeeper/myid'
+   sudo bash -c 'echo "tickTime=2000
+   dataDir=/var/lib/zookeeper
+   clientPort=2181" > /etc/zookeeper/zoo.cfg'
+Nodepool Setup
+Before starting on this, you need to download your `openrc`
+configuration from your OpenStack cloud.  Put it on your server in the
+fedora user's home directory.  It should be called
+``<username>``.  Once that is done, create a new keypair
+that will be installed when instantiating the servers::
+   cd ~
+   source <username>  # this will prompt for password - enter it
+   umask 0066
+   ssh-keygen -t rsa -b 2048 -f nodepool_rsa  # don't enter a passphrase
+   openstack keypair create --public-key nodepool
+We'll use the private key later wheen configuring Zuul.  In the same
+session, configure nodepool to talk to your cloud::
+   sudo mkdir -p ~nodepool/.config/openstack
+   cat > clouds.yaml <<EOF
+   clouds:
+     mycloud:
+       auth:
+         username: $OS_USERNAME
+         password: $OS_PASSWORD
+         project_name: ${OS_PROJECT_NAME:-$OS_TENANT_NAME}
+         auth_url: $OS_AUTH_URL
+       region_name: $OS_REGION_NAME
+   EOF
+   sudo mv clouds.yaml ~nodepool/.config/openstack/
+   sudo chown -R nodepool.nodepool ~nodepool/.config
+   umask 0002
+Once you've written out the file, double check all the required fields have been filled out.
+   sudo mkdir /etc/nodepool/
+   sudo mkdir /var/log/nodepool
+   sudo chgrp -R nodepool /var/log/nodepool/
+   sudo chmod 775 /var/log/nodepool/
+Nodepool Configuration
+Inputs needed for this file:
+* cloud name / region name - from clouds.yaml
+* flavor-name
+* image-name - from your cloud
+   sudo bash -c "cat >/etc/nodepool/nodepool.yaml <<EOF
+   zookeeper-servers:
+     - host: localhost
+       port: 2181
+   providers:
+     - name: myprovider # this is a nodepool identifier for this cloud provider (cloud+region combo)
+       region-name: regionOne  # this needs to match the region name in clouds.yaml but is only needed if there is more than one region
+       cloud: mycloud  # This needs to match the name in clouds.yaml
+       cloud-images:
+         - name: centos-7   # Defines a cloud-image for nodepool
+           image-name: CentOS-7-x86_64-GenericCloud-1706  # name of image from cloud
+           username: centos  # The user Zuul should log in as
+       pools:
+         - name: main
+           max-servers: 4  # nodepool will never create more than this many servers
+           labels:
+             - name: centos-7-small  # defines label that will be used to get one of these in a job
+               flavor-name: 'm1.small'  # name of flavor from cloud
+               cloud-image: centos-7  # matches name from cloud-images
+               key-name: nodepool # name of the keypair to use for authentication
+   labels:
+     - name: centos-7-small # defines label that will be used in jobs
+       min-ready: 2  # nodepool will always keep this many booted and ready to go
+   EOF"
+.. warning::
+   `min-ready:2` may incur costs in your cloud provider
+Zuul Setup
+   sudo mkdir /etc/zuul/
+   sudo mkdir /var/log/zuul/
+   sudo chown zuul.zuul /var/log/zuul/
+   sudo mkdir /var/lib/zuul/.ssh
+   sudo chmod 0700 /var/lib/zuul/.ssh
+   sudo mv nodepool_rsa /var/lib/zuul/.ssh
+   sudo chown -R zuul.zuul /var/lib/zuul/.ssh
+Zuul Configuration
+Write the Zuul config file.  Note that this configures Zuul's web
+server to listen on all public addresses.  This is so that Zuul may
+receive webhook events from GitHub.  You may wish to proxy this or
+further restrict public access.
+   sudo bash -c "cat > /etc/zuul/zuul.conf <<EOF
+   [gearman]
+   server=
+   [gearman_server]
+   start=true
+   [executor]
+   private_key_file=/home/zuul/.ssh/nodepool_rsa
+   [web]
+   listen_address=
+   [scheduler]
+   tenant_config=/etc/zuul/main.yaml
+   EOF"
+   sudo bash -c "cat > /etc/zuul/main.yaml <<EOF
+   - tenant:
+       name: quickstart
+   EOF"
+Service Management
+Zookeeper Service Management
+   sudo systemctl start zookeeper.service
+   sudo systemctl status zookeeper.service
+   ● zookeeper.service - Apache ZooKeeper
+      Loaded: loaded (/usr/lib/systemd/system/zookeeper.service; disabled; vendor preset: disabled)
+      Active: active (running) since Wed 2018-01-03 14:53:47 UTC; 5s ago
+     Process: 4153 ExecStart=/usr/bin/ start zoo.cfg (code=exited, status=0/SUCCESS)
+    Main PID: 4160 (java)
+       Tasks: 17 (limit: 4915)
+      CGroup: /system.slice/zookeeper.service
+              └─4160 java -Dzookeeper.log.dir=/var/log/zookeeper -Dzookeeper.root.logger=INFO,CONSOLE -cp /usr/share/java/
+   sudo systemctl enable zookeeper.service
+Nodepool Service Management
+   sudo bash -c "cat > /etc/systemd/system/nodepool-launcher.service <<EOF
+   [Unit]
+   Description=Nodepool Launcher Service
+   [Service]
+   Type=simple
+   # Options to pass to nodepool-launcher.
+   Group=nodepool
+   User=nodepool
+   RuntimeDirectory=nodepool
+   ExecStart=/usr/local/bin/nodepool-launcher
+   [Install]
+   EOF"
+   sudo chmod 0644 /etc/systemd/system/nodepool-launcher.service
+   sudo systemctl daemon-reload
+   sudo systemctl start nodepool-launcher.service
+   sudo systemctl status nodepool-launcher.service
+   sudo systemctl enable nodepool-launcher.service
+Zuul Service Management
+   sudo bash -c "cat > /etc/systemd/system/zuul-scheduler.service <<EOF
+   [Unit]
+   Description=Zuul Scheduler Service
+   [Service]
+   Type=simple
+   Group=zuul
+   User=zuul
+   RuntimeDirectory=zuul
+   ExecStart=/usr/local/bin/zuul-scheduler
+   ExecStop=/usr/local/bin/zuul-scheduler stop
+   [Install]
+   EOF"
+   sudo bash -c "cat > /etc/systemd/system/zuul-executor.service <<EOF
+   [Unit]
+   Description=Zuul Executor Service
+   [Service]
+   Type=simple
+   Group=zuul
+   User=zuul
+   RuntimeDirectory=zuul
+   ExecStart=/usr/local/bin/zuul-executor
+   ExecStop=/usr/local/bin/zuul-executor stop
+   [Install]
+   EOF"
+   sudo bash -c "cat > /etc/systemd/system/zuul-web.service <<EOF
+   [Unit]
+   Description=Zuul Web Service
+   [Service]
+   Type=simple
+   Group=zuul
+   User=zuul
+   RuntimeDirectory=zuul
+   ExecStart=/usr/local/bin/zuul-web
+   ExecStop=/usr/local/bin/zuul-web stop
+   [Install]
+   EOF"
+   sudo systemctl daemon-reload
+   sudo systemctl start zuul-scheduler.service
+   sudo systemctl status zuul-scheduler.service
+   sudo systemctl enable zuul-scheduler.service
+   sudo systemctl start zuul-executor.service
+   sudo systemctl status zuul-executor.service
+   sudo systemctl enable zuul-executor.service
+   sudo systemctl start zuul-web.service
+   sudo systemctl status zuul-web.service
+   sudo systemctl enable zuul-web.service
+Use Zuul Jobs
+Add to ``/etc/zuul/zuul.conf``::
+   sudo bash -c "cat >> /etc/zuul/zuul.conf <<EOF
+   [connection zuul-git]
+   driver=git
+   baseurl=
+   EOF"
+Restart executor and scheduler::
+   sudo systemctl restart zuul-executor.service
+   sudo systemctl restart zuul-scheduler.service
+Configure GitHub
+You'll need an organization in Github for this, so create one if you
+haven't already.  In this example we will use `my-org`.
+.. NOTE Duplicate content here and in drivers/github.rst.  Keep them
+   in sync.
+Create a `GitHub application
+* Go to your organization settings page to create the application, e.g.:
+* Set GitHub App name to "my-org-zuul"
+* Set Setup URL to your setup documentation, when users install the application
+  they are redirected to this url
+* Set Webhook URL to
+  ``http://<IP ADDRESS>/connection/github/payload``.
+* Create a Webhook secret, and record it for later use
+* Set permissions:
+  * Commit statuses: Read & Write
+  * Issues: Read & Write
+  * Pull requests: Read & Write
+  * Repository contents: Read & Write (write to let zuul merge change)
+  * Repository administration: Read
+* Set events subscription:
+  * Label
+  * Status
+  * Issue comment
+  * Issues
+  * Pull request
+  * Pull request review
+  * Pull request review comment
+  * Commit comment
+  * Create
+  * Push
+  * Release
+* Set Where can this GitHub App be installed to "Any account"
+* Create the App
+* Generate a Private key in the app settings page and save the file for later
+.. TODO See if we can script this using GitHub API
+Go back to the `General` settings page for the app,
+and look for the app `ID` number, under the `About` section.
+Edit ``/etc/zuul/zuul.conf`` to add the following::
+  [connection github]
+  driver=github
+  app_id=<APP ID NUMBER>
+  app_key=/etc/zuul/github.pem
+  webhook_token=<WEBHOOK TOKEN>
+Upload the private key which was generated earlier, and save it in
+Restart all of Zuul::
+  sudo systemctl restart zuul-executor.service
+  sudo systemctl restart zuul-web.service
+  sudo systemctl restart zuul-scheduler.service
+Go to the `Advanced` tab for the app in GitHub,
+and look for the initial ping from the app.  It probably wasn't
+delivered since Zuul wasn't configured at the time, so click
+``Resend`` and verify that it is delivered now that Zuul is
+Visit the public app page on GitHub,
+, and install the app into your org.
+Create two new repositories in your org.  One will hold the
+configuration for this tenant in Zuul, the other should be a normal
+project repo to use for testing.  We'll call them `zuul-test-config`
+and `zuul-test`, respectively.
+Edit ``/etc/zuul/main.yaml`` so that it looks like this::
+   - tenant:
+       name: quickstart
+       source:
+         zuul-git:
+           config-projects:
+             - openstack-infra/zuul-base-jobs
+           untrusted-projects:
+             - openstack-infra/zuul-jobs
+         github:
+           config-projects:
+             - my-org/zuul-test-config
+           untrusted-projects:
+             - my-org/zuul-test
+The first section, under 'zuul-git' imports the "standard library" of
+Zuul jobs, a collection of jobs that can be used by any Zuul
+The second section is your GitHub configuration.
+After updating the file, restart the Zuul scheduler::
+  sudo systemctl restart zuul-scheduler.service
+Add an initial pipeline configuration to the `zuul-test-config`
+repository.  Inside that project, create a ``zuul.yaml`` file with the
+following contents::
+   - pipeline:
+       name: check
+       description: |
+         Newly opened pull requests enter this pipeline to receive an
+         initial verification
+       manager: independent
+       trigger:
+         github:
+           - event: pull_request
+             action:
+               - opened
+               - changed
+               - reopened
+           - event: pull_request
+             action: comment
+             comment: (?i)^\s*recheck\s*$
+       start:
+         github:
+           status: pending
+           comment: false
+       success:
+         github:
+           status: 'success'
+       failure:
+         github:
+           status: 'failure'
+Merge that commit into the repository.
+In the `zuul-test` project, create a `.zuul.yaml` file with the
+following contents::
+   - project:
+       check:
+         jobs:
+           - noop
+Open a new pull request with that commit against the `zuul-test`
+project and verify that Zuul reports a successful run of the `noop`
diff --git a/doc/source/user/config.rst b/doc/source/user/config.rst
index 597062e..8492423 100644
--- a/doc/source/user/config.rst
+++ b/doc/source/user/config.rst
@@ -546,6 +546,12 @@
       from this job. Once this is set to ``true`` it cannot be reset to
+   .. attr:: abstract
+      :default: false
+      To indicate a job is not intended to be run directly, but
+      instead must be inherited from, set this attribute to ``true``.
    .. attr:: success-message
       :default: SUCCESS
diff --git a/requirements.txt b/requirements.txt
index f24f195..7057c5a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,6 +25,6 @@
diff --git a/tests/ b/tests/
index c13519e..ec94c48 100755
--- a/tests/
+++ b/tests/
@@ -1439,6 +1439,7 @@
     def __init__(self, use_ssl=False):
         self.hold_jobs_in_queue = False
         self.hold_merge_jobs_in_queue = False
+        self.jobs_history = []
         if use_ssl:
             ssl_ca = os.path.join(FIXTURE_DIR, 'gearman/root-ca.pem')
             ssl_cert = os.path.join(FIXTURE_DIR, 'gearman/server.pem')
@@ -1455,6 +1456,7 @@
     def getJobForConnection(self, connection, peek=False):
         for job_queue in [self.high_queue, self.normal_queue, self.low_queue]:
             for job in job_queue:
+                self.jobs_history.append(job)
                 if not hasattr(job, 'waiting'):
                         job.waiting = self.hold_jobs_in_queue
@@ -1647,6 +1649,10 @@
         nodeid = path.split("/")[-1]
         return nodeid
+    def removeNode(self, node):
+        path = self.NODE_ROOT + '/' + node["_oid"]
+        self.client.delete(path, recursive=True)
     def addFailRequest(self, request):
diff --git a/tests/fixtures/config/abstract/git/common-config/playbooks/base.yaml b/tests/fixtures/config/abstract/git/common-config/playbooks/base.yaml
new file mode 100644
index 0000000..f679dce
--- /dev/null
+++ b/tests/fixtures/config/abstract/git/common-config/playbooks/base.yaml
@@ -0,0 +1,2 @@
+- hosts: all
+  tasks: []
diff --git a/tests/fixtures/config/abstract/git/common-config/zuul.yaml b/tests/fixtures/config/abstract/git/common-config/zuul.yaml
new file mode 100644
index 0000000..4aeb947
--- /dev/null
+++ b/tests/fixtures/config/abstract/git/common-config/zuul.yaml
@@ -0,0 +1,25 @@
+- pipeline:
+    name: check
+    manager: independent
+    trigger:
+      gerrit:
+        - event: patchset-created
+    success:
+      gerrit:
+        Verified: 1
+    failure:
+      gerrit:
+        Verified: -1
+- job:
+    name: base
+    parent: null
+    run: playbooks/base.yaml
+- job:
+    name: job-abstract
+    abstract: true
+- job:
+    name: job-child
+    parent: job-abstract
diff --git a/tests/fixtures/config/abstract/git/org_project/zuul.yaml b/tests/fixtures/config/abstract/git/org_project/zuul.yaml
new file mode 100644
index 0000000..cf635e8
--- /dev/null
+++ b/tests/fixtures/config/abstract/git/org_project/zuul.yaml
@@ -0,0 +1,4 @@
+- project:
+    name: org/project
+    check:
+      jobs: []
diff --git a/tests/fixtures/config/abstract/main.yaml b/tests/fixtures/config/abstract/main.yaml
new file mode 100644
index 0000000..208e274
--- /dev/null
+++ b/tests/fixtures/config/abstract/main.yaml
@@ -0,0 +1,8 @@
+- tenant:
+    name: tenant-one
+    source:
+      gerrit:
+        config-projects:
+          - common-config
+        untrusted-projects:
+          - org/project
diff --git a/tests/fixtures/config/disk-accountant/git/common-config/playbooks/dd-big-empty-file.yaml b/tests/fixtures/config/disk-accountant/git/common-config/playbooks/dd-big-empty-file.yaml
index 95ab870..ba35eb0 100644
--- a/tests/fixtures/config/disk-accountant/git/common-config/playbooks/dd-big-empty-file.yaml
+++ b/tests/fixtures/config/disk-accountant/git/common-config/playbooks/dd-big-empty-file.yaml
@@ -1,6 +1,7 @@
 - hosts: localhost
     - command: dd if=/dev/zero of=toobig bs=1M count=2
+    - command: sync
     - wait_for:
         delay: 10
         path: /
diff --git a/tests/fixtures/config/tenant-parser/groups4.yaml b/tests/fixtures/config/tenant-parser/groups4.yaml
new file mode 100644
index 0000000..3512673
--- /dev/null
+++ b/tests/fixtures/config/tenant-parser/groups4.yaml
@@ -0,0 +1,11 @@
+- tenant:
+    name: tenant-one
+    source:
+      gerrit:
+        config-projects:
+          - common-config
+        untrusted-projects:
+          - include: []
+            projects:
+              - org/project1
+              - org/project2
diff --git a/tests/fixtures/layouts/timer-github.yaml b/tests/fixtures/layouts/timer-github.yaml
new file mode 100644
index 0000000..4f3efe4
--- /dev/null
+++ b/tests/fixtures/layouts/timer-github.yaml
@@ -0,0 +1,25 @@
+- pipeline:
+    name: periodic
+    manager: independent
+    trigger:
+      timer:
+        - time: '* * * * * */1'
+- job:
+    name: base
+    parent: null
+    run: playbooks/base.yaml
+- job:
+    name: project-bitrot
+    nodeset:
+      nodes:
+        - name: static
+          label: ubuntu-xenial
+    run: playbooks/project-bitrot.yaml
+- project:
+    name: org/project
+    periodic:
+      jobs:
+        - project-bitrot
diff --git a/tests/unit/ b/tests/unit/
index f7d580c..49220f2 100644
--- a/tests/unit/
+++ b/tests/unit/
@@ -213,6 +213,33 @@
+class TestTenantGroups4(TenantParserTestCase):
+    tenant_config_file = 'config/tenant-parser/groups4.yaml'
+    def test_tenant_groups(self):
+        tenant = self.sched.abide.tenants.get('tenant-one')
+        self.assertEqual(['common-config'],
+                         [ for x in tenant.config_projects])
+        self.assertEqual(['org/project1', 'org/project2'],
+                         [ for x in tenant.untrusted_projects])
+        project = tenant.config_projects[0]
+        tpc = tenant.project_configs[project.canonical_name]
+        self.assertEqual(self.CONFIG_SET, tpc.load_classes)
+        project = tenant.untrusted_projects[0]
+        tpc = tenant.project_configs[project.canonical_name]
+        self.assertEqual(set([]),
+                         tpc.load_classes)
+        project = tenant.untrusted_projects[1]
+        tpc = tenant.project_configs[project.canonical_name]
+        self.assertEqual(set([]),
+                         tpc.load_classes)
+        # Check that only one merger:cat job was requested
+        # org/project1 and org/project2 have an empty load_classes
+        cat_jobs = [job for job in self.gearman_server.jobs_history
+                    if == b'merger:cat']
+        self.assertEqual(1, len(cat_jobs))
 class TestTenantUnprotectedBranches(TenantParserTestCase):
     tenant_config_file = 'config/tenant-parser/unprotected-branches.yaml'
diff --git a/tests/unit/ b/tests/unit/
index 7081b53..e12846d 100644
--- a/tests/unit/
+++ b/tests/unit/
@@ -10,6 +10,7 @@
 # License for the specific language governing permissions and limitations
 # under the License.
+import fixtures
 import os
 import tempfile
 import time
@@ -32,6 +33,10 @@
 class TestDiskAccountant(BaseTestCase):
+    def setUp(self):
+        super(TestDiskAccountant, self).setUp()
+        self.useFixture(fixtures.NestedTempfile())
     def test_disk_accountant(self):
         jobs_dir = tempfile.mkdtemp(
             dir=os.environ.get("ZUUL_TEST_ROOT", None))
@@ -47,6 +52,8 @@
             testfile = os.path.join(jobdir, 'tfile')
             with open(testfile, 'w') as tf:
                 tf.write(2 * 1024 * 1024 * '.')
+                tf.flush()
+                os.fsync(tf.fileno())
             # da should catch over-limit dir within 5 seconds
             for i in range(0, 50):
diff --git a/tests/unit/ b/tests/unit/
index b424769..0a5c0a4 100644
--- a/tests/unit/
+++ b/tests/unit/
@@ -12,6 +12,7 @@
 # License for the specific language governing permissions and limitations
 # under the License.
+import fixtures
 import os
 import subprocess
 import tempfile
@@ -26,6 +27,10 @@
     def setUp(self):
         super(TestEncryption, self).setUp()
         self.private, self.public = encryption.generate_rsa_keypair()
+        # Because we set delete to False when using NamedTemporaryFile below
+        # we need to stick our usage of temporary files in the NestedTempfile
+        # fixture ensuring everything gets cleaned up when it is done.
+        self.useFixture(fixtures.NestedTempfile())
     def test_serialization(self):
         "Verify key serialization"
diff --git a/tests/unit/ b/tests/unit/
index 8cb98ee..46e1d99 100755
--- a/tests/unit/
+++ b/tests/unit/
@@ -490,6 +490,7 @@
     def test_slow_start(self):
         self.executor_server.hold_jobs_in_build = True
         self.executor_server.max_starting_builds = 1
+        self.executor_server.min_starting_builds = 1
         A = self.fake_gerrit.addFakeChange('common-config', 'master', 'A')
diff --git a/tests/unit/ b/tests/unit/
index cd36ba3..8978415 100644
--- a/tests/unit/
+++ b/tests/unit/
@@ -210,6 +210,34 @@
         self.assertEqual(1, len(self.history))
+    @simple_layout('layouts/basic-github.yaml', driver='github')
+    def test_timer_event(self):
+        self.executor_server.hold_jobs_in_build = True
+        self.commitConfigUpdate('org/common-config',
+                                'layouts/timer-github.yaml')
+        self.sched.reconfigure(self.config)
+        time.sleep(2)
+        self.waitUntilSettled()
+        self.assertEqual(len(self.builds), 1)
+        self.executor_server.hold_jobs_in_build = False
+        # Stop queuing timer triggered jobs so that the assertions
+        # below don't race against more jobs being queued.
+        self.commitConfigUpdate('org/common-config',
+                                'layouts/basic-github.yaml')
+        self.sched.reconfigure(self.config)
+        self.waitUntilSettled()
+        # If APScheduler is in mid-event when we remove the job, we
+        # can end up with one more event firing, so give it an extra
+        # second to settle.
+        time.sleep(1)
+        self.waitUntilSettled()
+        self.executor_server.release()
+        self.waitUntilSettled()
+        self.assertHistory([
+            dict(name='project-bitrot', result='SUCCESS',
+                 ref='refs/heads/master'),
+        ], ordered=False)
     @simple_layout('layouts/dequeue-github.yaml', driver='github')
     def test_dequeue_pull_synchronized(self):
         self.executor_server.hold_jobs_in_build = True
diff --git a/tests/unit/ b/tests/unit/
index 9a2eb28..c833fa2 100755
--- a/tests/unit/
+++ b/tests/unit/
@@ -37,6 +37,7 @@
+    iterate_timeout,
@@ -1505,7 +1506,7 @@
         r = client.autohold('tenant-one', 'org/project', 'project-test2',
-                            "reason text", 1)
+                            "", "", "reason text", 1)
         # First check that successful jobs do not autohold
@@ -1552,7 +1553,7 @@
             " ".join(['tenant-one',
-                      'project-test2'])
+                      'project-test2', '.*'])
         self.assertEqual(held_node['comment'], "reason text")
@@ -1572,13 +1573,151 @@
                 held_nodes += 1
         self.assertEqual(held_nodes, 1)
+    def _test_autohold_scoped(self, change_obj, change, ref):
+        client = zuul.rpcclient.RPCClient('',
+                                          self.gearman_server.port)
+        self.addCleanup(client.shutdown)
+        # create two changes on the same project, and autohold request
+        # for one of them.
+        other = self.fake_gerrit.addFakeChange(
+            'org/project', 'master', 'other'
+        )
+        r = client.autohold('tenant-one', 'org/project', 'project-test2',
+                            str(change), ref, "reason text", 1)
+        self.assertTrue(r)
+        # First, check that an unrelated job does not trigger autohold, even
+        # when it failed
+        self.executor_server.failJob('project-test2', other)
+        self.fake_gerrit.addEvent(other.getPatchsetCreatedEvent(1))
+        self.waitUntilSettled()
+        self.assertEqual(['status'], 'NEW')
+        self.assertEqual(other.reported, 1)
+        # project-test2
+        self.assertEqual(self.history[0].result, 'FAILURE')
+        # Check nodepool for a held node
+        held_node = None
+        for node in self.fake_nodepool.getNodes():
+            if node['state'] == zuul.model.STATE_HOLD:
+                held_node = node
+                break
+        self.assertIsNone(held_node)
+        # And then verify that failed job for the defined change
+        # triggers the autohold
+        self.executor_server.failJob('project-test2', change_obj)
+        self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1))
+        self.waitUntilSettled()
+        self.assertEqual(['status'], 'NEW')
+        self.assertEqual(change_obj.reported, 1)
+        # project-test2
+        self.assertEqual(self.history[1].result, 'FAILURE')
+        # Check nodepool for a held node
+        held_node = None
+        for node in self.fake_nodepool.getNodes():
+            if node['state'] == zuul.model.STATE_HOLD:
+                held_node = node
+                break
+        self.assertIsNotNone(held_node)
+        # Validate node has recorded the failed job
+        if change != "":
+            ref = "refs/changes/%s/%s/.*" % (
+                str(change_obj.number)[-1:], str(change_obj.number)
+            )
+        self.assertEqual(
+            held_node['hold_job'],
+            " ".join(['tenant-one',
+                      '',
+                      'project-test2', ref])
+        )
+        self.assertEqual(held_node['comment'], "reason text")
+    @simple_layout('layouts/autohold.yaml')
+    def test_autohold_change(self):
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+        self._test_autohold_scoped(A, change=A.number, ref="")
+    @simple_layout('layouts/autohold.yaml')
+    def test_autohold_ref(self):
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+        ref =['currentPatchSet']['ref']
+        self._test_autohold_scoped(A, change="", ref=ref)
+    @simple_layout('layouts/autohold.yaml')
+    def test_autohold_scoping(self):
+        client = zuul.rpcclient.RPCClient('',
+                                          self.gearman_server.port)
+        self.addCleanup(client.shutdown)
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+        # create three autohold requests, scoped to job, change and
+        # a specific ref
+        change = str(A.number)
+        ref =['currentPatchSet']['ref']
+        r1 = client.autohold('tenant-one', 'org/project', 'project-test2',
+                             "", "", "reason text", 1)
+        self.assertTrue(r1)
+        r2 = client.autohold('tenant-one', 'org/project', 'project-test2',
+                             change, "", "reason text", 1)
+        self.assertTrue(r2)
+        r3 = client.autohold('tenant-one', 'org/project', 'project-test2',
+                             "", ref, "reason text", 1)
+        self.assertTrue(r3)
+        # Fail 3 jobs for the same change, and verify that the autohold
+        # requests are fullfilled in the expected order: from the most
+        # specific towards the most generic one.
+        def _fail_job_and_verify_autohold_request(change_obj, ref_filter):
+            self.executor_server.failJob('project-test2', change_obj)
+            self.fake_gerrit.addEvent(change_obj.getPatchsetCreatedEvent(1))
+            self.waitUntilSettled()
+            # Check nodepool for a held node
+            held_node = None
+            for node in self.fake_nodepool.getNodes():
+                if node['state'] == zuul.model.STATE_HOLD:
+                    held_node = node
+                    break
+            self.assertIsNotNone(held_node)
+            self.assertEqual(
+                held_node['hold_job'],
+                " ".join(['tenant-one',
+                          '',
+                          'project-test2', ref_filter])
+            )
+            self.assertFalse(held_node['_lock'], "Node %s is locked" %
+                             (node['_oid'],))
+            self.fake_nodepool.removeNode(held_node)
+        _fail_job_and_verify_autohold_request(A, ref)
+        ref = "refs/changes/%s/%s/.*" % (str(change)[-1:], str(change))
+        _fail_job_and_verify_autohold_request(A, ref)
+        _fail_job_and_verify_autohold_request(A, ".*")
     def test_autohold_ignores_aborted_jobs(self):
         client = zuul.rpcclient.RPCClient('',
         r = client.autohold('tenant-one', 'org/project', 'project-test2',
-                            "reason text", 1)
+                            "", "", "reason text", 1)
         self.executor_server.hold_jobs_in_build = True
@@ -1622,7 +1761,7 @@
         r = client.autohold('tenant-one', 'org/project', 'project-test2',
-                            "reason text", 1)
+                            "", "", "reason text", 1)
         autohold_requests = client.autohold_list()
@@ -1631,11 +1770,12 @@
         # The single dict key should be a CSV string value
         key = list(autohold_requests.keys())[0]
-        tenant, project, job = key.split(',')
+        tenant, project, job, ref_filter = key.split(',')
         self.assertEqual('tenant-one', tenant)
         self.assertIn('org/project', project)
         self.assertEqual('project-test2', job)
+        self.assertEqual(".*", ref_filter)
         # Note: the value is converted from set to list by json.
         self.assertEqual([1, "reason text"], autohold_requests[key])
@@ -4397,6 +4537,54 @@
         self.assertEqual(['status'], 'MERGED')
         self.assertEqual(A.reported, 2)
+    def test_zookeeper_disconnect2(self):
+        "Test that jobs are executed after a zookeeper disconnect"
+        # This tests receiving a ZK disconnect between the arrival of
+        # a fulfilled request and when we accept its nodes.
+        self.fake_nodepool.paused = True
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
+        A.addApproval('Code-Review', 2)
+        self.fake_gerrit.addEvent(A.addApproval('Approved', 1))
+        self.waitUntilSettled()
+        # We're waiting on the nodepool request to complete.  Stop the
+        # scheduler from processing further events, then fulfill the
+        # nodepool request.
+        self.sched.run_handler_lock.acquire()
+        # Fulfill the nodepool request.
+        self.fake_nodepool.paused = False
+        requests = list(self.sched.nodepool.requests.values())
+        self.assertEqual(1, len(requests))
+        request = requests[0]
+        for x in iterate_timeout(30, 'fulfill request'):
+            if request.fulfilled:
+                break
+        id1 =
+        # The request is fulfilled, but the scheduler hasn't processed
+        # it yet.  Reconnect ZK.
+        self.zk.client.stop()
+        self.zk.client.start()
+        # Allow the scheduler to continue and process the (now
+        # out-of-date) notification that nodes are ready.
+        self.sched.run_handler_lock.release()
+        # It should resubmit the request, once it's fulfilled, we can
+        # wait for it to run jobs and settle.
+        for x in iterate_timeout(30, 'fulfill request'):
+            if request.fulfilled:
+                break
+        self.waitUntilSettled()
+        id2 =
+        self.assertEqual(['status'], 'MERGED')
+        self.assertEqual(A.reported, 2)
+        # Make sure it was resubmitted (the id's should be different).
+        self.assertNotEqual(id1, id2)
     def test_nodepool_failure(self):
         "Test that jobs are reported after a nodepool failure"
diff --git a/tests/unit/ b/tests/unit/
index 44eda82..573c8a6 100755
--- a/tests/unit/
+++ b/tests/unit/
@@ -74,44 +74,43 @@
 class TestProtected(ZuulTestCase):
     tenant_config_file = 'config/protected/main.yaml'
     def test_protected_ok(self):
-            # test clean usage of final parent job
-            in_repo_conf = textwrap.dedent(
-                """
-                - job:
-                    name: job-protected
-                    protected: true
-                    run: playbooks/job-protected.yaml
+        # test clean usage of final parent job
+        in_repo_conf = textwrap.dedent(
+            """
+            - job:
+                name: job-protected
+                protected: true
+                run: playbooks/job-protected.yaml
-                - project:
-                    name: org/project
-                    check:
-                      jobs:
-                        - job-child-ok
+            - project:
+                name: org/project
+                check:
+                  jobs:
+                    - job-child-ok
-                - job:
-                    name: job-child-ok
-                    parent: job-protected
+            - job:
+                name: job-child-ok
+                parent: job-protected
-                - project:
-                    name: org/project
-                    check:
-                      jobs:
-                        - job-child-ok
+            - project:
+                name: org/project
+                check:
+                  jobs:
+                    - job-child-ok
-                """)
+            """)
-            file_dict = {'zuul.yaml': in_repo_conf}
-            A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
-                                               files=file_dict)
-            self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
-            self.waitUntilSettled()
+        file_dict = {'zuul.yaml': in_repo_conf}
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
+                                           files=file_dict)
+        self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+        self.waitUntilSettled()
-            self.assertEqual(A.reported, 1)
-            self.assertEqual(A.patchsets[-1]['approvals'][0]['value'], '1')
+        self.assertEqual(A.reported, 1)
+        self.assertEqual(A.patchsets[-1]['approvals'][0]['value'], '1')
     def test_protected_reset(self):
         # try to reset protected flag
@@ -177,6 +176,47 @@
             "and cannot be inherited from other projects.", A.messages[0])
+class TestAbstract(ZuulTestCase):
+    tenant_config_file = 'config/abstract/main.yaml'
+    def test_abstract_fail(self):
+        in_repo_conf = textwrap.dedent(
+            """
+            - project:
+                check:
+                  jobs:
+                    - job-abstract
+            """)
+        file_dict = {'zuul.yaml': in_repo_conf}
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
+                                           files=file_dict)
+        self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+        self.waitUntilSettled()
+        self.assertEqual(A.reported, 1)
+        self.assertEqual(A.patchsets[-1]['approvals'][0]['value'], '-1')
+        self.assertIn('may not be directly run', A.messages[0])
+    def test_child_of_abstract(self):
+        in_repo_conf = textwrap.dedent(
+            """
+            - project:
+                check:
+                  jobs:
+                    - job-child
+            """)
+        file_dict = {'zuul.yaml': in_repo_conf}
+        A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A',
+                                           files=file_dict)
+        self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
+        self.waitUntilSettled()
+        self.assertEqual(A.reported, 1)
+        self.assertEqual(A.patchsets[-1]['approvals'][0]['value'], '1')
 class TestFinal(ZuulTestCase):
     tenant_config_file = 'config/final/main.yaml'
diff --git a/tools/ b/tools/
index 4cb1666..45ad68c 100755
--- a/tools/
+++ b/tools/
@@ -26,9 +26,11 @@
     from urllib.request import Request
     from urllib.request import urlopen
+    from urllib.parse import urlparse
 except ImportError:
     from urllib2 import Request
     from urllib2 import urlopen
+    from urlparse import urlparse
 DESCRIPTION = """Encrypt a secret for Zuul.
@@ -43,7 +45,6 @@
                         help="The base URL of the zuul server and tenant.  "
-    # TODO(jeblair): Throw a fit if SSL is not used.
                         help="The name of the project.")
     parser.add_argument('--strip', action='store_true', default=False,
@@ -60,6 +61,15 @@
                         "to standard output.")
     args = parser.parse_args()
+    # We should not use unencrypted connections for retrieving the public key.
+    # Otherwise our secret can be compromised. The schemes file and https are
+    # considered safe.
+    url = urlparse(args.url)
+    if url.scheme not in ('file', 'https'):
+        sys.stderr.write("WARNING: Retrieving encryption key via an "
+                         "unencrypted connection. Your secret may get "
+                         "compromised.\n")
     req = Request("%s/" % (args.url.rstrip('/'), args.project))
     pubkey = urlopen(req)
diff --git a/tools/ b/tools/
index 101fd11..da6fd0c 100755
--- a/tools/
+++ b/tools/
@@ -11,6 +11,8 @@
 # TODO: for real use override the following variables
 server = ''
 api_token = 'xxxx'
+appid = 2
+appkey = '/opt/project/appkey'
 org = 'example'
 repo = 'sandbox'
@@ -42,20 +44,36 @@
     return conn
+def create_connection_app(server, appid, appkey):
+    driver = GithubDriver()
+    connection_config = {
+        'server': server,
+        'app_id': appid,
+        'app_key': appkey,
+    }
+    conn = GithubConnection(driver, 'github', connection_config)
+    conn._authenticateGithubAPI()
+    conn._prime_installation_map()
+    return conn
 def get_change(connection: GithubConnection,
                org: str,
                repo: str,
                pull: int) -> Change:
     p = Project("%s/%s" % (org, repo), connection.source)
-    github = connection.getGithubClient(p)
+    github = connection.getGithubClient(
     pr = github.pull_request(org, repo, pull)
     sha = pr.head.sha
     return conn._getChange(p, pull, sha, True)
-# create github connection
+# create github connection with api token
 conn = create_connection(server, api_token)
+# create github connection with app key
+# conn = create_connection_app(server, appid, appkey)
 # Now we can do anything we want with the connection, e.g. check canMerge for
 # a pull request.
diff --git a/tools/ b/tools/
index c02a016..58c39cf 100755
--- a/tools/
+++ b/tools/
@@ -3,7 +3,7 @@
 /usr/zuul-env/bin/zuul-cloner --workspace /tmp --cache-dir /opt/git \
     git:// openstack-infra/nodepool
-ln -s /tmp/nodepool/log $WORKSPACE/logs
+ln -s /tmp/nodepool/log $HOME/logs
 cd /tmp/openstack-infra/nodepool
diff --git a/tox.ini b/tox.ini
index 73915ad..e5035bd 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,7 +6,7 @@
 basepython = python3
 setenv = VIRTUAL_ENV={envdir}
-         OS_TEST_TIMEOUT=120
+         OS_TEST_TIMEOUT=150
 usedevelop = True
 install_command = pip install {opts} {packages}
diff --git a/zuul/ansible/callback/ b/zuul/ansible/callback/
index df28a57..15b491c 100644
--- a/zuul/ansible/callback/
+++ b/zuul/ansible/callback/
@@ -367,12 +367,13 @@
                     result, status='MODULE FAILURE',
-        elif (len([key for key in result_dict.keys()
-                   if not key.startswith('_ansible')]) == 1):
+        elif result._task.action == 'debug':
             # this is a debug statement, handle it special
             for key in [k for k in result_dict.keys()
                         if k.startswith('_ansible')]:
                 del result_dict[key]
+            if 'changed' in result_dict.keys():
+                del result_dict['changed']
             keyname = next(iter(result_dict.keys()))
             # If it has msg, that means it was like:
diff --git a/zuul/ansible/library/ b/zuul/ansible/library/
index f84766d..6703cc1 100644
--- a/zuul/ansible/library/
+++ b/zuul/ansible/library/
@@ -277,7 +277,7 @@
         inode = get_inode()
         if not inode:
-                "Could not find inode for port",
+                msg="Could not find inode for port",
         pid, exceptions = get_pid_from_inode(inode)
diff --git a/zuul/cmd/ b/zuul/cmd/
index b299219..6ab7598 100755
--- a/zuul/cmd/
+++ b/zuul/cmd/
@@ -59,9 +59,12 @@
             thread = threads.get(thread_id)
             if thread:
                 thread_name =
+                thread_is_daemon = str(thread.daemon)
                 thread_name = thread.ident
-            log_str += "Thread: %s %s\n" % (thread_id, thread_name)
+                thread_is_daemon = '(Unknown)'
+            log_str += "Thread: %s %s d: %s\n"\
+                       % (thread_id, thread_name, thread_is_daemon)
             log_str += "".join(traceback.format_stack(stack_frame))
     except Exception:
diff --git a/zuul/cmd/ b/zuul/cmd/
index ebf59b9..a7b3ef3 100755
--- a/zuul/cmd/
+++ b/zuul/cmd/
@@ -51,6 +51,11 @@
         cmd_autohold.add_argument('--job', help='job name',
+        cmd_autohold.add_argument('--change',
+                                  help='specific change to hold nodes for',
+                                  required=False, default='')
+        cmd_autohold.add_argument('--ref', help='git ref to hold nodes for',
+                                  required=False, default='')
         cmd_autohold.add_argument('--reason', help='reason for the hold',
@@ -173,9 +178,15 @@
     def autohold(self):
         client = zuul.rpcclient.RPCClient(
             self.server, self.port, self.ssl_key, self.ssl_cert, self.ssl_ca)
+        if self.args.change and self.args.ref:
+            print("Change and ref can't be both used for the same request")
+            return False
         r = client.autohold(tenant=self.args.tenant,
+                            change=self.args.change,
+                            ref=self.args.ref,
         return r
@@ -190,14 +201,19 @@
             return True
         table = prettytable.PrettyTable(
-            field_names=['Tenant', 'Project', 'Job', 'Count', 'Reason'])
+            field_names=[
+                'Tenant', 'Project', 'Job', 'Ref Filter', 'Count', 'Reason'
+            ])
         for key, value in autohold_requests.items():
             # The key comes to us as a CSV string because json doesn't like
             # non-str keys.
-            tenant_name, project_name, job_name = key.split(',')
+            tenant_name, project_name, job_name, ref_filter = key.split(',')
             count, reason = value
-            table.add_row([tenant_name, project_name, job_name, count, reason])
+            table.add_row([
+                tenant_name, project_name, job_name, ref_filter, count, reason
+            ])
         return True
diff --git a/zuul/cmd/ b/zuul/cmd/
index 68c9000..a3a53cf 100755
--- a/zuul/cmd/
+++ b/zuul/cmd/
@@ -159,6 +159,7 @@
             self.log.exception("Error starting Zuul:")
             # TODO(jeblair): If we had all threads marked as daemon,
             # we might be able to have a nicer way of exiting here.
+            self.sched.stop()
         signal.signal(signal.SIGHUP, self.reconfigure_handler)
diff --git a/zuul/ b/zuul/
index be1bd63..bd2ce3a 100644
--- a/zuul/
+++ b/zuul/
@@ -474,6 +474,7 @@
     # Attributes of a job that can also be used in Project and ProjectTemplate
     job_attributes = {'parent': vs.Any(str, None),
                       'final': bool,
+                      'abstract': bool,
                       'protected': bool,
                       'failure-message': str,
                       'success-message': str,
@@ -514,6 +515,7 @@
     simple_attributes = [
+        'abstract',
@@ -1448,6 +1450,11 @@
         for project in untrusted_projects:
+            tpc = tenant.project_configs[project.canonical_name]
+            # If all config classes are excluded then does not request a
+            # getFiles jobs.
+            if not tpc.load_classes:
+                continue
             # If we have cached data (this is a reconfiguration) use it.
             if cached and project.unparsed_config:
                 jobs.append(CachedDataJob(False, project))
diff --git a/zuul/driver/gerrit/ b/zuul/driver/gerrit/
index fdc1ad7..8f3408e 100644
--- a/zuul/driver/gerrit/
+++ b/zuul/driver/gerrit/
@@ -141,6 +141,10 @@
         return [f]
+    def getRefForChange(self, change):
+        partial = change[-2:]
+        return "refs/changes/%s/%s/.*" % (partial, change)
 approval = vs.Schema({'username': str,
                       'email': str,
diff --git a/zuul/driver/git/ b/zuul/driver/git/
index a7d42be..9f0963d 100644
--- a/zuul/driver/git/
+++ b/zuul/driver/git/
@@ -68,3 +68,6 @@
     def getRejectFilters(self, config):
         return []
+    def getRefForChange(self, change):
+        raise NotImplemented()
diff --git a/zuul/driver/github/ b/zuul/driver/github/
index 6072f4c..6dfcdd3 100644
--- a/zuul/driver/github/
+++ b/zuul/driver/github/
@@ -458,6 +458,7 @@
         self._github = None
         self.app_id = None
         self.app_key = None
+        self.sched = None
         self.installation_map = {}
         self.installation_token_cache = {}
@@ -721,7 +722,8 @@
             change.newrev = event.newrev
             change.url = self.getGitwebUrl(project, sha=event.newrev)
             change.source_event = event
-            change.files = self.getPushedFileNames(event)
+            if hasattr(event, 'commits'):
+                change.files = self.getPushedFileNames(event)
         return change
     def _getChange(self, project, number, patchset=None, refresh=False):
@@ -827,7 +829,8 @@
         change.updated_at = self._ghTimestampToDate(
-        self.sched.onChangeUpdated(change)
+        if self.sched:
+            self.sched.onChangeUpdated(change)
         return change
diff --git a/zuul/driver/github/ b/zuul/driver/github/
index 33f8f7c..6f9b14d 100644
--- a/zuul/driver/github/
+++ b/zuul/driver/github/
@@ -144,6 +144,9 @@
         return [f]
+    def getRefForChange(self, change):
+        return "refs/pull/%s/head" % change
 review = v.Schema({'username': str,
                    'email': str,
diff --git a/zuul/executor/ b/zuul/executor/
index ffc083f..8de6fe0 100644
--- a/zuul/executor/
+++ b/zuul/executor/
@@ -777,11 +777,29 @@
         return data
     def doMergeChanges(self, merger, items, repo_state):
-        ret = merger.mergeChanges(items, repo_state=repo_state)
-        if not ret:  # merge conflict
-            result = dict(result='MERGER_FAILURE')
+        try:
+            ret = merger.mergeChanges(items, repo_state=repo_state)
+        except ValueError as e:
+            # Return ABORTED so that we'll try again. At this point all of
+            # the refs we're trying to merge should be valid refs. If we
+            # can't fetch them, it should resolve itself.
+            self.log.exception("Could not fetch refs to merge from remote")
+            result = dict(result='ABORTED')
             return False
+        if not ret:  # merge conflict
+            result = dict(result='MERGER_FAILURE')
+            if self.executor_server.statsd:
+                base_key = ("zuul.executor.%s.merger" %
+                            self.executor_server.hostname)
+                self.executor_server.statsd.incr(base_key + ".FAILURE")
+            self.job.sendWorkComplete(json.dumps(result))
+            return False
+        if self.executor_server.statsd:
+            base_key = ("zuul.executor.%s.merger" %
+                        self.executor_server.hostname)
+            self.executor_server.statsd.incr(base_key + ".SUCCESS")
         recent = ret[3]
         for key, commit in recent.items():
             (connection, project, branch) = key
@@ -835,6 +853,13 @@
         return selected_ref
+    def getAnsibleTimeout(self, start, timeout):
+        if timeout is not None:
+            now = time.time()
+            elapsed = now - start
+            timeout = timeout - elapsed
+        return timeout
     def runPlaybooks(self, args):
         result = None
@@ -852,10 +877,15 @@
         pre_failed = False
         success = False
         self.started = True
+        time_started = time.time()
+        # timeout value is total job timeout or put another way
+        # the cummulative time that pre, run, and post can consume.
+        job_timeout = args['timeout']
         for index, playbook in enumerate(self.jobdir.pre_playbooks):
             # TODOv3(pabelanger): Implement pre-run timeout setting.
+            ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
             pre_status, pre_code = self.runAnsiblePlaybook(
-                playbook, args['timeout'], phase='pre', index=index)
+                playbook, ansible_timeout, phase='pre', index=index)
             if pre_status != self.RESULT_NORMAL or pre_code != 0:
                 # These should really never fail, so return None and have
                 # zuul try again
@@ -863,8 +893,9 @@
         if not pre_failed:
+            ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
             job_status, job_code = self.runAnsiblePlaybook(
-                self.jobdir.playbook, args['timeout'], phase='run')
+                self.jobdir.playbook, ansible_timeout, phase='run')
             if job_status == self.RESULT_ABORTED:
                 return 'ABORTED'
             elif job_status == self.RESULT_TIMED_OUT:
@@ -885,8 +916,9 @@
         for index, playbook in enumerate(self.jobdir.post_playbooks):
             # TODOv3(pabelanger): Implement post-run timeout setting.
+            ansible_timeout = self.getAnsibleTimeout(time_started, job_timeout)
             post_status, post_code = self.runAnsiblePlaybook(
-                playbook, args['timeout'], success, phase='post', index=index)
+                playbook, ansible_timeout, success, phase='post', index=index)
             if post_status == self.RESULT_ABORTED:
                 return 'ABORTED'
             if post_status != self.RESULT_NORMAL or post_code != 0:
@@ -1465,6 +1497,11 @@
         self.log.debug("Ansible complete, result %s code %s" % (
             self.RESULT_MAP[result], code))
+        if self.executor_server.statsd:
+            base_key = ("zuul.executor.%s.phase.setup" %
+                        self.executor_server.hostname)
+            self.executor_server.statsd.incr(base_key + ".%s" %
+                                             self.RESULT_MAP[result])
         return result, code
     def runAnsibleCleanup(self, playbook):
@@ -1485,6 +1522,11 @@
         self.log.debug("Ansible complete, result %s code %s" % (
             self.RESULT_MAP[result], code))
+        if self.executor_server.statsd:
+            base_key = ("zuul.executor.%s.phase.cleanup" %
+                        self.executor_server.hostname)
+            self.executor_server.statsd.incr(base_key + ".%s" %
+                                             self.RESULT_MAP[result])
         return result, code
     def emitPlaybookBanner(self, playbook, step, phase, result=None):
@@ -1554,6 +1596,11 @@
             cmd=cmd, timeout=timeout, playbook=playbook)
         self.log.debug("Ansible complete, result %s code %s" % (
             self.RESULT_MAP[result], code))
+        if self.executor_server.statsd:
+            base_key = ("zuul.executor.%s.phase.%s" %
+                        (self.executor_server.hostname, phase or 'unknown'))
+            self.executor_server.statsd.incr(base_key + ".%s" %
+                                             self.RESULT_MAP[result])
         self.emitPlaybookBanner(playbook, 'END', phase, result=result)
         return result, code
@@ -1636,6 +1683,7 @@
                                             'load_multiplier', '2.5'))
         self.max_load_avg = multiprocessing.cpu_count() * load_multiplier
         self.max_starting_builds = self.max_load_avg * 2
+        self.min_starting_builds = max(int(multiprocessing.cpu_count() / 2), 1)
         self.min_avail_mem = float(get_default(self.config, 'executor',
                                                'min_avail_mem', '5.0'))
         self.accepting_work = False
@@ -1808,7 +1856,7 @@
         if self.statsd:
             base_key = 'zuul.executor.%s' % self.hostname
             self.statsd.gauge(base_key + '.load_average', 0)
-            self.statsd.gauge(base_key + '.pct_available_ram', 0)
+            self.statsd.gauge(base_key + '.pct_used_ram', 0)
             self.statsd.gauge(base_key + '.running_builds', 0)
@@ -1975,7 +2023,7 @@
                 starting_builds += 1
         max_starting_builds = max(
             self.max_starting_builds - len(self.job_workers),
-            1)
+            self.min_starting_builds)
         if self.accepting_work:
             # Don't unregister if we don't have any active jobs.
             if load_avg > self.max_load_avg:
@@ -2007,8 +2055,8 @@
             base_key = 'zuul.executor.%s' % self.hostname
             self.statsd.gauge(base_key + '.load_average',
                               int(load_avg * 100))
-            self.statsd.gauge(base_key + '.pct_available_ram',
-                              int(avail_mem_pct * 100))
+            self.statsd.gauge(base_key + '.pct_used_ram',
+                              int((100.0 - avail_mem_pct) * 100))
             self.statsd.gauge(base_key + '.running_builds',
             self.statsd.gauge(base_key + '.starting_builds',
diff --git a/zuul/merger/ b/zuul/merger/
index 63bb5d5..aba8645 100644
--- a/zuul/merger/
+++ b/zuul/merger/
@@ -266,14 +266,6 @@
         return repo.head.commit
-    def checkoutLocalBranch(self, branch):
-        # TODO(jeblair): retire in favor of checkout
-        repo = self.createRepoObject()
-        # Perform a hard reset before checking out so that we clean up
-        # anything that might be left over from a merge.
-        reset_repo_to_head(repo)
-        repo.heads[branch].checkout()
     def cherryPick(self, ref):
         repo = self.createRepoObject()
         self.log.debug("Cherry-picking %s" % ref)
diff --git a/zuul/ b/zuul/
index 38f2d6b..45fc1a8 100644
--- a/zuul/
+++ b/zuul/
@@ -848,6 +848,7 @@
+            abstract=False,
@@ -1044,7 +1045,7 @@
         for k in self.execution_attributes:
             if (other._get(k) is not None and
-                    k not in set(['final', 'protected'])):
+                k not in set(['final', 'abstract', 'protected'])):
                     raise Exception("Unable to modify final job %s attribute "
                                     "%s=%s with variant %s" % (
@@ -1070,6 +1071,13 @@
         if != self.attributes['final']:
+        # Abstract may not be reset by a variant, it may only be
+        # cleared by inheriting.
+        if !=
+            self.abstract = other.abstract
+        elif other.abstract:
+            self.abstract = True
         # Protected may only be set to true
         if other.protected is not None:
             # don't allow to reset protected flag
@@ -2836,6 +2844,10 @@
                 item.debug("No matching pipeline variants for {jobname}".
                            format(jobname=jobname), indent=2)
+            if frozen_job.abstract:
+                raise Exception("Job %s is abstract and may not be "
+                                "directly run" %
+                                (,))
             if (frozen_job.allowed_projects is not None and
        not in frozen_job.allowed_projects):
                 raise Exception("Project %s is not allowed to run job %s" %
diff --git a/zuul/ b/zuul/
index b96d1ca..6e7064c 100644
--- a/zuul/
+++ b/zuul/
@@ -165,6 +165,7 @@
         self.log.debug("Updating node request %s" % (request,))
         if request.uid not in self.requests:
+            self.log.debug("Request %s is unknown" % (request.uid,))
             return False
         if request.canceled:
@@ -193,14 +194,21 @@
     def acceptNodes(self, request, request_id):
         # Called by the scheduler when it wants to accept and lock
-        # nodes for (potential) use.
+        # nodes for (potential) use.  Return False if there is a
+        # problem with the request (canceled or retrying), True if it
+        # is ready to be acted upon (success or failure).
 "Accepting node request %s" % (request,))
         if request_id !=
   "Skipping node accept for %s (resubmitted as %s)",
-            return
+            return False
+        if request.canceled:
+  "Ignoring canceled node request %s" % (request,))
+            # The request was already deleted when it was canceled
+            return False
         # Make sure the request still exists. It's possible it could have
         # disappeared if we lost the ZK session between when the fulfillment
@@ -208,13 +216,13 @@
         # processing it. Nodepool will automatically reallocate the assigned
         # nodes in that situation.
         if not self.sched.zk.nodeRequestExists(request):
-  "Request %s no longer exists",
-            return
-        if request.canceled:
-  "Ignoring canceled node request %s" % (request,))
-            # The request was already deleted when it was canceled
-            return
+  "Request %s no longer exists, resubmitting",
+   = None
+            request.state = model.STATE_REQUESTED
+            self.requests[request.uid] = request
+            self.sched.zk.submitNodeRequest(request, self._updateNodeRequest)
+            return False
         locked = False
         if request.fulfilled:
@@ -239,3 +247,4 @@
             # them.
             if locked:
+        return True
diff --git a/zuul/ b/zuul/
index 8f2e5dc..a947ed0 100644
--- a/zuul/
+++ b/zuul/
@@ -48,10 +48,12 @@
         self.log.debug("Job complete, success: %s" % (not job.failure))
         return job
-    def autohold(self, tenant, project, job, reason, count):
+    def autohold(self, tenant, project, job, change, ref, reason, count):
         data = {'tenant': tenant,
                 'project': project,
                 'job': job,
+                'change': change,
+                'ref': ref,
                 'reason': reason,
                 'count': count}
         return not self.submitJob('zuul:autohold', data).failure
diff --git a/zuul/ b/zuul/
index e5016df..f3f55f6 100644
--- a/zuul/
+++ b/zuul/
@@ -150,7 +150,20 @@
+        if args['change'] and args['ref']:
+            job.sendWorkException("Change and ref can't be both used "
+                                  "for the same request")
+        if args['change']:
+            # Convert change into ref based on zuul connection
+            ref_filter = project.source.getRefForChange(args['change'])
+        elif args['ref']:
+            ref_filter = "%s" % args['ref']
+        else:
+            ref_filter = ".*"
         params['job_name'] = args['job']
+        params['ref_filter'] = ref_filter
         params['reason'] = args['reason']
         if args['count'] < 0:
diff --git a/zuul/ b/zuul/
index 2130ede..d16200c 100644
--- a/zuul/
+++ b/zuul/
@@ -19,6 +19,7 @@
 import logging
 import os
 import pickle
+import re
 import queue
 import socket
 import sys
@@ -230,6 +231,7 @@
         self.statsd = get_statsd(config)
         self.rpc = rpclistener.RPCListener(config, self)
         self.stats_thread = threading.Thread(target=self.runStats)
+        self.stats_thread.daemon = True
         self.stats_stop = threading.Event()
         # TODO(jeblair): fix this
         # Despite triggers being part of the pipeline, there is one trigger set
@@ -435,8 +437,9 @@
         self.last_reconfigured = int(time.time())
         # TODOv3(jeblair): reconfigure time should be per-tenant
-    def autohold(self, tenant_name, project_name, job_name, reason, count):
-        key = (tenant_name, project_name, job_name)
+    def autohold(self, tenant_name, project_name, job_name, ref_filter,
+                 reason, count):
+        key = (tenant_name, project_name, job_name, ref_filter)
         if count == 0 and key in self.autohold_requests:
             self.log.debug("Removing autohold for %s", key)
             del self.autohold_requests[key]
@@ -544,7 +547,7 @@
         self.config = event.config
-            self.log.debug("Full reconfiguration beginning")
+  "Full reconfiguration beginning")
             loader = configloader.ConfigLoader()
             abide = loader.loadConfig(
                 self.config.get('scheduler', 'tenant_config'),
@@ -555,14 +558,14 @@
             self.abide = abide
-        self.log.debug("Full reconfiguration complete")
+"Full reconfiguration complete")
     def _doTenantReconfigureEvent(self, event):
         # This is called in the scheduler loop after another thread submits
         # a request
-            self.log.debug("Tenant reconfiguration beginning")
+  "Tenant reconfiguration beginning")
             # If a change landed to a project, clear out the cached
             # config before reconfiguring.
             for project in event.projects:
@@ -579,7 +582,7 @@
             self.abide = abide
-        self.log.debug("Tenant reconfiguration complete")
+"Tenant reconfiguration complete")
     def _reenqueueGetProject(self, tenant, item):
         project = item.change.project
@@ -972,6 +975,84 @@
             self.log.exception("Exception estimating build time:")
+    def _getAutoholdRequestKey(self, build):
+        change = build.build_set.item.change
+        autohold_key_base = (,
+                             change.project.canonical_name,
+        class Scope(object):
+            """Enum defining a precedence/priority of autohold requests.
+            Autohold requests for specific refs should be fulfilled first,
+            before those for changes, and generic jobs.
+            Matching algorithm goes over all existing autohold requests, and
+            returns one with the highest number (in case of duplicated
+            requests the last one wins).
+            """
+            NONE = 0
+            JOB = 1
+            CHANGE = 2
+            REF = 3
+        def autohold_key_base_issubset(base, request_key):
+            """check whether the given key is a subset of the build key"""
+            index = 0
+            base_len = len(base)
+            while index < base_len:
+                if base[index] != request_key[index]:
+                    return False
+                index += 1
+            return True
+        # Do a partial match of the autohold key against all autohold
+        # requests, ignoring the last element of the key (ref filter),
+        # and finally do a regex match between ref filter from
+        # the autohold request and the build's change ref to check
+        # if it matches. Lastly, make sure that we match the most
+        # specific autohold request by comparing "scopes"
+        # of requests - the most specific is selected.
+        autohold_key = None
+        scope = Scope.NONE
+        for request in self.autohold_requests:
+            ref_filter = request[-1]
+            if not autohold_key_base_issubset(autohold_key_base, request) \
+                or not re.match(ref_filter, change.ref):
+                continue
+            if ref_filter == ".*":
+                candidate_scope = Scope.JOB
+            elif ref_filter.endswith(".*"):
+                candidate_scope = Scope.CHANGE
+            else:
+                candidate_scope = Scope.REF
+            if candidate_scope > scope:
+                scope = candidate_scope
+                autohold_key = request
+        return autohold_key
+    def _processAutohold(self, build):
+        # We explicitly only want to hold nodes for jobs if they have
+        # failed and have an autohold request.
+        if build.result != "FAILURE":
+            return
+        autohold_key = self._getAutoholdRequestKey(build)
+        try:
+            self.nodepool.holdNodeSet(build.nodeset, autohold_key)
+        except Exception:
+            self.log.exception("Unable to process autohold for %s:",
+                               autohold_key)
+            if autohold_key in self.autohold_requests:
+                self.log.debug("Removing autohold %s due to exception",
+                               autohold_key)
+                del self.autohold_requests[autohold_key]
     def _doBuildCompletedEvent(self, event):
         build =
@@ -979,27 +1060,10 @@
         # to pass this on to the pipeline manager, make sure we return
         # the nodes to nodepool.
-            nodeset = build.nodeset
-            autohold_key = (,
-                            build.build_set.item.change.project.canonical_name,
-            if (build.result == "FAILURE" and
-                autohold_key in self.autohold_requests):
-                # We explicitly only want to hold nodes for jobs if they have
-                # failed and have an autohold request.
-                try:
-                    self.nodepool.holdNodeSet(nodeset, autohold_key)
-                except Exception:
-                    self.log.exception("Unable to process autohold for %s:",
-                                       autohold_key)
-                    if autohold_key in self.autohold_requests:
-                        self.log.debug("Removing autohold %s due to exception",
-                                       autohold_key)
-                        del self.autohold_requests[autohold_key]
-            self.nodepool.returnNodeSet(nodeset)
+            self._processAutohold(build)
+            self.nodepool.returnNodeSet(build.nodeset)
         except Exception:
-            self.log.exception("Unable to return nodeset %s" % (nodeset,))
+            self.log.exception("Unable to return nodeset %s" % build.nodeset)
         if build.build_set is not build.build_set.item.current_build_set:
             self.log.debug("Build %s is not in the current build set" %
@@ -1035,8 +1099,8 @@
         request_id = event.request_id
         build_set = request.build_set
-        self.nodepool.acceptNodes(request, request_id)
-        if request.canceled:
+        ready = self.nodepool.acceptNodes(request, request_id)
+        if not ready:
         if build_set is not build_set.item.current_build_set:
diff --git a/zuul/web/ b/zuul/web/
index adbafb5..e962738 100755
--- a/zuul/web/
+++ b/zuul/web/
@@ -101,8 +101,7 @@
         except Exception as e:
             self.log.exception("Finger client exception:")
-            msg = "Failure from finger client: %s" % e
-            await ws.send_str(msg.decode('utf8'))
+            await ws.send_str("Failure from finger client: %s" % e)
         return (1000, "No more data")