Handle Jenkins not launching a job.
If Jenkins fails to launch a job, a change can get stuck in the
queue. This is very similar to when we lose track of a build in
Jenkins, so in this case, pretend the build was launched, and mark
it as LOST. The queue will eventually drop the change and things
will keep moving.
Also, Jenkins sometimes erroneously reports 404 on the build URL.
If we get an error launching a job, just retry it for 30 seconds
before we finally give up.
Fixes bug #1011907.
Change-Id: I605fe37abf2fc3e7685df0f7a8e460b9a5d508b3
diff --git a/zuul/launcher/jenkins.py b/zuul/launcher/jenkins.py
index 705fbdf..209661d 100644
--- a/zuul/launcher/jenkins.py
+++ b/zuul/launcher/jenkins.py
@@ -180,20 +180,36 @@
params['GERRIT_OLDREV'] = change.oldrev
params['GERRIT_NEWREV'] = change.newrev
build = Build(job, uuid)
- self.builds[uuid] = build
# We can get the started notification on another thread before
# this is done so we add the build even before we trigger the
- # job on Jenkins. We should be careful to clean it up if it
- # doesn't actually kick off.
- try:
- self.jenkins.build_job(job.name, parameters=params)
- except:
- self.log.exception(
- "Exception launching build %s for job %s for change %s:" % (
- build, job, change))
- # Whoops. Remove that build we added.
- del self.builds[uuid]
- raise
+ # job on Jenkins.
+ self.builds[uuid] = build
+ # Sometimes Jenkins may erroneously respond with a 404. Handle
+ # that by retrying for 30 seconds.
+ launched = False
+ errored = False
+ for count in range(6):
+ try:
+ self.jenkins.build_job(job.name, parameters=params)
+ launched = True
+ break
+ except:
+ errored = True
+ self.log.exception(
+ "Exception launching build %s for job %s for change %s\
+(will retry):" % (build, job, change))
+ time.sleep(5)
+
+ if errored:
+ if launched:
+ self.log.error("Finally able to launch %s" % build)
+ else:
+ self.log.error("Unable to launch %s, even after retrying,\
+declaring lost" % build)
+ # To keep the queue moving, declare this as a lost build
+ # so that the change will get dropped.
+ self.onBuildCompleted(build.uuid, 'LOST', None, None)
+
return build
def findBuildInQueue(self, build):