Merger: retry network operations
Try cloning and fetching 3 times, 30 seconds apart, to add some
robustness.
Change-Id: I4f113a0d1313ea4485086f62514825ec980584a6
diff --git a/tests/fixtures/git_fetch_error.sh b/tests/fixtures/git_fetch_error.sh
new file mode 100755
index 0000000..49c568c
--- /dev/null
+++ b/tests/fixtures/git_fetch_error.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+echo $*
+case "$1" in
+ fetch)
+ if [ -f ./stamp1 ]; then
+ touch ./stamp2
+ exit 0
+ fi
+ touch ./stamp1
+ exit 1
+ ;;
+ version)
+ echo "git version 1.0.0"
+ exit 0
+ ;;
+esac
diff --git a/tests/unit/test_merger_repo.py b/tests/unit/test_merger_repo.py
index ec30a2b..fb2f199 100644
--- a/tests/unit/test_merger_repo.py
+++ b/tests/unit/test_merger_repo.py
@@ -82,7 +82,7 @@
os.path.join(FIXTURE_DIR, 'fake_git.sh'))
work_repo = Repo(parent_path, self.workspace_root,
'none@example.org', 'User Name', '0', '0',
- git_timeout=0.001)
+ git_timeout=0.001, retry_attempts=1)
# TODO: have the merger and repo classes catch fewer
# exceptions, including this one on initialization. For the
# test, we try cloning again.
@@ -93,10 +93,26 @@
def test_fetch_timeout(self):
parent_path = os.path.join(self.upstream_root, 'org/project1')
work_repo = Repo(parent_path, self.workspace_root,
- 'none@example.org', 'User Name', '0', '0')
+ 'none@example.org', 'User Name', '0', '0',
+ retry_attempts=1)
work_repo.git_timeout = 0.001
self.patch(git.Git, 'GIT_PYTHON_GIT_EXECUTABLE',
os.path.join(FIXTURE_DIR, 'fake_git.sh'))
with testtools.ExpectedException(git.exc.GitCommandError,
'.*exit code\(-9\)'):
work_repo.update()
+
+ def test_fetch_retry(self):
+ parent_path = os.path.join(self.upstream_root, 'org/project1')
+ work_repo = Repo(parent_path, self.workspace_root,
+ 'none@example.org', 'User Name', '0', '0',
+ retry_interval=1)
+ self.patch(git.Git, 'GIT_PYTHON_GIT_EXECUTABLE',
+ os.path.join(FIXTURE_DIR, 'git_fetch_error.sh'))
+ work_repo.update()
+ # This is created on the first fetch
+ self.assertTrue(os.path.exists(os.path.join(
+ self.workspace_root, 'stamp1')))
+ # This is created on the second fetch
+ self.assertTrue(os.path.exists(os.path.join(
+ self.workspace_root, 'stamp2')))
diff --git a/zuul/merger/merger.py b/zuul/merger/merger.py
index 035dbf5..c221478 100644
--- a/zuul/merger/merger.py
+++ b/zuul/merger/merger.py
@@ -17,6 +17,7 @@
import logging
import os
import shutil
+import time
import git
import gitdb
@@ -59,7 +60,8 @@
class Repo(object):
def __init__(self, remote, local, email, username, speed_limit, speed_time,
- sshkey=None, cache_path=None, logger=None, git_timeout=300):
+ sshkey=None, cache_path=None, logger=None, git_timeout=300,
+ retry_attempts=3, retry_interval=30):
if logger is None:
self.log = logging.getLogger("zuul.Repo")
else:
@@ -78,6 +80,8 @@
self.username = username
self.cache_path = cache_path
self._initialized = False
+ self.retry_attempts = retry_attempts
+ self.retry_interval = retry_interval
try:
self._ensure_cloned()
except Exception:
@@ -123,14 +127,37 @@
def _git_clone(self, url):
mygit = git.cmd.Git(os.getcwd())
mygit.update_environment(**self.env)
- with timeout_handler(self.local_path):
- mygit.clone(git.cmd.Git.polish_url(url), self.local_path,
- kill_after_timeout=self.git_timeout)
+
+ for attempt in range(1, self.retry_attempts + 1):
+ try:
+ with timeout_handler(self.local_path):
+ mygit.clone(git.cmd.Git.polish_url(url), self.local_path,
+ kill_after_timeout=self.git_timeout)
+ break
+ except Exception as e:
+ if attempt < self.retry_attempts:
+ time.sleep(self.retry_interval)
+ self.log.warning("Retry %s: Clone %s" % (
+ attempt, self.local_path))
+ else:
+ raise
def _git_fetch(self, repo, remote, ref=None, **kwargs):
- with timeout_handler(self.local_path):
- repo.git.fetch(remote, ref, kill_after_timeout=self.git_timeout,
- **kwargs)
+ for attempt in range(1, self.retry_attempts + 1):
+ try:
+ with timeout_handler(self.local_path):
+ repo.git.fetch(remote, ref,
+ kill_after_timeout=self.git_timeout,
+ **kwargs)
+ break
+ except Exception as e:
+ if attempt < self.retry_attempts:
+ time.sleep(self.retry_interval)
+ self.log.exception("Retry %s: Fetch %s %s %s" % (
+ attempt, self.local_path, remote, ref))
+ self._ensure_cloned()
+ else:
+ raise
def createRepoObject(self):
self._ensure_cloned()