Ansible launcher: add AFS publisher

This is an extension to JJB that works only in zuul-launcher, not
Jenkins.  It allows copying the results of a build into afs.
It actually isn't really AFS specific at all, other than it
checks that the destination path is under /afs.  Otherwise, it
behaves as a local copy on the launcher itself.

It also contains the logic needed to publish OpenStack's
documentation builds, which can appear as subdirectories of other
builds.

Change-Id: Icda75266219d2d7167e80aaad8e290443cfdbadc
diff --git a/zuul/launcher/ansiblelaunchserver.py b/zuul/launcher/ansiblelaunchserver.py
index 40d0d5a..2321cea 100644
--- a/zuul/launcher/ansiblelaunchserver.py
+++ b/zuul/launcher/ansiblelaunchserver.py
@@ -1088,6 +1088,159 @@
         tasks.append(task)
         return tasks
 
+    def _makeAFSTask(self, jobdir, publisher, parameters):
+        tasks = []
+        afs = publisher['afs']
+
+        # It is possible that this could be done in one rsync step,
+        # however, the current rysnc from the host is complicated (so
+        # that we can match the behavior of ant), and then rsync to
+        # afs is complicated and involves a pre-processing step in
+        # both locations (so that we can exclude directories).  Each
+        # is well understood individually so it is easier to compose
+        # them in series than combine them together.  A better,
+        # longer-lived solution (with better testing) would do just
+        # that.
+        afsroot = tempfile.mkdtemp(dir=jobdir.staging_root)
+        afscontent = os.path.join(afsroot, 'content')
+
+        src = parameters['WORKSPACE']
+        if not src.endswith('/'):
+            src = src + '/'
+        rsync_opts = self._getRsyncOptions(afs['source'],
+                                           parameters)
+        syncargs = dict(src=src,
+                        dest=afscontent,
+                        copy_links='yes',
+                        mode='pull')
+        if rsync_opts:
+            syncargs['rsync_opts'] = rsync_opts
+        task = dict(synchronize=syncargs,
+                    when='success')
+        task.update(self.retry_args)
+        tasks.append(task)
+
+        afstarget = afs['target']
+        afstarget = self._substituteVariables(afstarget, parameters)
+        afstarget = os.path.normpath(afstarget)
+        if not afstarget.startswith('/afs'):
+            raise Exception("Target path %s is not below AFS root" %
+                            (afstarget,))
+
+        src_markers_file = os.path.join(afsroot, 'src-markers')
+        dst_markers_file = os.path.join(afsroot, 'dst-markers')
+        exclude_file = os.path.join(afsroot, 'exclude')
+        filter_file = os.path.join(afsroot, 'filter')
+
+        find_pipe = [
+            "/usr/bin/find {path} -name .root-marker -printf '%P\n'",
+            "/usr/bin/xargs -I{{}} dirname {{}}",
+            "/usr/bin/sort > {file}"]
+        find_pipe = ' | '.join(find_pipe)
+
+        # Find the list of root markers in the just-completed build
+        # (usually there will only be one, but some builds produce
+        # content at the root *and* at a tag location).
+        task = dict(shell=find_pipe.format(path=afscontent,
+                                           file=src_markers_file),
+                    when='success',
+                    delegate_to='127.0.0.1')
+        tasks.append(task)
+
+        # Find the list of root markers that already exist in the
+        # published site.
+        task = dict(shell=find_pipe.format(path=afstarget,
+                                           file=dst_markers_file),
+                    when='success',
+                    delegate_to='127.0.0.1')
+        tasks.append(task)
+
+        # Create a file that contains the set of directories with root
+        # markers in the published site that do not have root markers
+        # in the built site.
+        exclude_command = "/usr/bin/comm -23 {dst} {src} > {exclude}".format(
+            src=src_markers_file,
+            dst=dst_markers_file,
+            exclude=exclude_file)
+        task = dict(shell=exclude_command,
+                    when='success',
+                    delegate_to='127.0.0.1')
+        tasks.append(task)
+
+        # Create a filter list for rsync so that we copy exactly the
+        # directories we want to without deleting any existing
+        # directories in the published site that were placed there by
+        # previous builds.
+
+        # The first group of items in the filter list are the
+        # directories in the current build with root markers, except
+        # for the root of the build.  This is so that if, later, the
+        # build root ends up as an exclude, we still copy the
+        # directories in this build underneath it (since these
+        # includes will have matched first).  We can't include the
+        # build root itself here, even if we do want to synchronize
+        # it, since that would defeat later excludes.  In other words,
+        # if the build produces a root marker in "/subdir" but not in
+        # "/", this section is needed so that "/subdir" is copied at
+        # all, since "/" will be excluded later.
+
+        command = ("/bin/grep -v '^/$' {src} | "
+                   "/bin/sed -e 's/^+ /' > {filter}".format(
+                       src=src_markers_file,
+                       filter=filter_file))
+        task = dict(shell=command,
+                    when='success',
+                    delegate_to='127.0.0.1')
+        tasks.append(task)
+
+        # The second group is the set of directories that are in the
+        # published site but not in the built site.  This is so that
+        # if the built site does contain a marker at root (meaning
+        # that there is content that should be copied into the root)
+        # that we don't delete everything else previously built
+        # underneath the root.
+
+        command = ("/bin/grep -v '^/$' {exclude} | "
+                   "/bin/sed -e 's/^- /' >> {filter}".format(
+                       exclude=exclude_file,
+                       filter=filter_file))
+        task = dict(shell=command,
+                    when='success',
+                    delegate_to='127.0.0.1')
+        tasks.append(task)
+
+        # The last entry in the filter file is for the build root.  If
+        # there is no marker in the build root, then we need to
+        # exclude it from the rsync, so we add it here.  It needs to
+        # be in the form of '/*' so that it matches all of the files
+        # in the build root.  If there is no marker at the build root,
+        # then we should omit the '/*' exclusion so that it is
+        # implicitly included.
+
+        command = "grep '^/$' {exclude} && echo '- /*' >> {filter}".format(
+            exclude=exclude_file,
+            filter=filter_file)
+        task = dict(shell=command,
+                    when='success',
+                    delegate_to='127.0.0.1')
+        tasks.append(task)
+
+        # Perform the rsync with the filter list.
+        rsync_cmd = [
+            '/usr/bin/rsync', '-rtp', '--safe-links', '--delete-after',
+            "--filter='merge {filter}'", '{src}/', '{dst}/',
+        ]
+        shellargs = ' '.join(rsync_cmd).format(
+            src=afscontent,
+            dst=afstarget,
+            filter=filter_file)
+        task = dict(shell=shellargs,
+                    when='success',
+                    delegate_to='127.0.0.1')
+        tasks.append(task)
+
+        return tasks
+
     def _makeBuilderTask(self, jobdir, builder, parameters):
         tasks = []
         script_fn = '%s.sh' % str(uuid.uuid4().hex)
@@ -1242,6 +1395,9 @@
                     if 'ftp' in publisher:
                         block.extend(self._makeFTPTask(jobdir, publisher,
                                                        parameters))
+                    if 'afs' in publisher:
+                        block.extend(self._makeAFSTask(jobdir, publisher,
+                                                       parameters))
                 blocks.append(block)
 
             # The 'always' section contains the log publishing tasks,