| # Copyright 2013 Rackspace Australia |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| # not use this file except in compliance with the License. You may obtain |
| # a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations |
| # under the License. |
| |
| |
| import copy |
| import json |
| import logging |
| import os |
| import tempfile |
| import pkg_resources |
| import socket |
| import uuid |
| |
| from turbo_hipster.lib import common |
| from turbo_hipster.lib import utils |
| |
| |
| class Task(object): |
| """ A base object for running a job (aka Task) """ |
| log = logging.getLogger("task") |
| |
| def __init__(self, worker_server, job_name, job_config): |
| # TODO(jhesketh): remove the need for worker_server here |
| self.worker_server = worker_server |
| # NOTE(jhesketh): job_config may be in the old format where name |
| # refers to the plugin and function is the job name. Thus these should |
| # never be used in a job, instead use the provided job_name. |
| self.job_config = job_config |
| self.job_name = job_name |
| self._reset() |
| |
| # Define the number of steps we will do to determine our progress. |
| self.total_steps = 0 |
| |
| def _cleanup(self): |
| if self.log_handler: |
| self.log.removeHandler(self.log_handler) |
| self.log_handler.flush() |
| self.log_handler.close() |
| if ('shutdown-th' in self.job_config and |
| self.job_config['shutdown-th']): |
| self.worker_server.shutdown_gracefully() |
| |
| def _reset(self): |
| self.job = None |
| self.job_arguments = None |
| self.work_data = None |
| self.cancelled = False |
| self.success = True |
| self.messages = [] |
| self.current_step = 0 |
| self.log_handler = None |
| self.th_uuid = str(uuid.uuid4())[-12:] |
| |
| def _prep_working_dir(self): |
| # Use the th_uuid so that if the same job is somehow taken twice from |
| # zuul we won't re-use zuul's uuid. This shouldn't happen but if it |
| # does it prevents overwriting previous results |
| self.job_working_dir = os.path.join( |
| self.worker_server.config['jobs_working_dir'], |
| self.th_uuid, |
| self.job_arguments['LOG_PATH'] |
| ) |
| self.job_results_dir = os.path.join( |
| self.job_working_dir, |
| 'results' |
| ) |
| self.task_output_log = os.path.join( |
| self.job_results_dir, |
| 'task_output.log' |
| ) |
| |
| if not os.path.isdir(os.path.dirname(self.task_output_log)): |
| os.makedirs(os.path.dirname(self.task_output_log)) |
| |
| def _setup_task_logging(self): |
| self.log_handler = logging.FileHandler(self.task_output_log) |
| log_formatter = logging.Formatter('%(asctime)s %(message)s') |
| self.log_handler.setFormatter(log_formatter) |
| self.log.addHandler(self.log_handler) |
| self.log.setLevel(logging.DEBUG) |
| |
| def start_job(self, job): |
| self._reset() |
| self.job = job |
| |
| if self.job is not None: |
| try: |
| self.job_arguments = \ |
| json.loads(self.job.arguments.decode('utf-8')) |
| self.log.debug("Got job from ZUUL %s" % self.job_arguments) |
| |
| # Send an initial WORK_DATA and WORK_STATUS packets |
| self._send_work_data() |
| |
| # Prep working dirs |
| self._prep_working_dir() |
| |
| # Now we have working dirs we can log the job details to a file |
| self._setup_task_logging() |
| |
| except Exception as e: |
| # If something failed during this section we have been unable |
| # to log to file. As such raise an exception to gearman |
| self.log.exception("Failure during setup") |
| self.log.exception(e) |
| if not self.cancelled: |
| self.success = False |
| self.messages.append('FAILURE during the job setup') |
| self.messages.append('Exception: %s' % e) |
| self._send_work_data() |
| self.job.sendWorkException(str(e).encode('utf-8')) |
| # No point trying the job, lets return here |
| self._send_final_results() |
| return |
| |
| # From here we can log exceptions |
| try: |
| # Execute the job_steps |
| self.do_job_steps() |
| except Exception as e: |
| # Log the problem |
| if not self.cancelled: |
| self.success = False |
| self.log.exception('Something failed running the job!') |
| self.messages.append('FAILURE running the job') |
| self.messages.append('Exception: %s' % e) |
| # Don't return from here as we can continue uploading the |
| # logs |
| |
| try: |
| self._cleanup() |
| self._upload_results() |
| except Exception as e: |
| # If something failed during this section we have been unable |
| # to upload the log. As such raise an exception to gearman |
| self.log.exception("Failure during cleanup and upload") |
| self.log.exception(e) |
| if not self.cancelled: |
| self.success = False |
| self.messages.append('FAILURE during cleanup and log ' |
| 'upload') |
| self.messages.append('Exception: %s' % e) |
| self._send_work_data() |
| self.job.sendWorkException(str(e).encode('utf-8')) |
| finally: |
| # Finally, send updated work data and completed packets |
| self._send_final_results() |
| |
| def stop_working(self, number=None): |
| # Check the number is for this job instance (None will cancel all) |
| # (makes it possible to run multiple workers with this task |
| # on this server) |
| if number is None or number == self.job.unique: |
| self.log.debug("We've been asked to stop by our gearman manager") |
| self.cancelled = True |
| # TODO: Work out how to kill current step |
| |
| def _get_work_data(self): |
| if self.work_data is None: |
| hostname = os.uname()[1] |
| fqdn = socket.getfqdn() |
| self.work_data = dict( |
| name=self.job_name, |
| number=self.job.unique, |
| manager='turbo-hipster-manager-%s' % hostname, |
| url='http://localhost', |
| worker_hostname=hostname, |
| worker_fqdn=fqdn, |
| worker_program='turbo-hipster', |
| ) |
| try: |
| self.work_data['worker_version'] = ( |
| pkg_resources.get_distribution('turbo_hipster').version |
| ) |
| except pkg_resources.DistributionNotFound: |
| # Package isn't installed; I do not think that manually |
| # attempting to extract version in some ad-hoc manner would be |
| # worth it -> just ignore this. |
| pass |
| return self.work_data |
| |
| def _send_work_data(self): |
| """ Send the WORK DATA in json format for job """ |
| self.log.debug("Send the work data response: %s" % |
| json.dumps(self._get_work_data())) |
| if self.success: |
| self.work_data['result'] = 'SUCCESS' |
| else: |
| self.work_data['result'] = '\n'.join(self.messages) |
| self.job.sendWorkData(json.dumps(self._get_work_data())) |
| |
| def _send_final_results(self): |
| self._send_work_data() |
| |
| if self.success: |
| self.job.sendWorkComplete( |
| json.dumps(self._get_work_data())) |
| else: |
| self.job.sendWorkFail() |
| |
| def _do_next_step(self): |
| """ Send a WORK_STATUS command to the gearman server. |
| This can provide a progress bar. """ |
| |
| # Each opportunity we should check if we need to stop |
| if self.cancelled: |
| self.work_data['result'] = "Failed: Job cancelled" |
| self.job.sendWorkStatus(self.current_step, self.total_steps) |
| self.job.sendWorkFail() |
| raise Exception('Job cancelled') |
| |
| self.current_step += 1 |
| self.job.sendWorkStatus(self.current_step, self.total_steps) |
| |
| def _upload_results(self): |
| """Upload the contents of the working dir either using the instructions |
| provided by zuul and/or our configuration""" |
| |
| self.log.debug("Process the resulting files (upload/push)") |
| |
| dir_list = os.listdir(self.job_results_dir) |
| path_list = [os.path.join(self.job_results_dir, i) for i in dir_list] |
| |
| if 'publish_logs' in self.worker_server.config: |
| index_url = utils.push_files( |
| self.job_arguments['LOG_PATH'], path_list, |
| self.worker_server.config['publish_logs']) |
| self.log.debug("Index URL found at %s" % index_url) |
| self.work_data['url'] = index_url |
| |
| if 'ZUUL_EXTRA_SWIFT_URL' in self.job_arguments: |
| # Upload to zuul's url as instructed |
| utils.zuul_swift_upload(self.job_working_dir, self.job_arguments) |
| self.work_data['url'] = self.job_arguments['LOG_PATH'] |
| |
| |
| class ShellTask(Task): |
| log = logging.getLogger("task.shell_task") |
| |
| def __init__(self, worker_server, job_name, job_config): |
| super(ShellTask, self).__init__(worker_server, job_name, job_config) |
| # Define the number of steps we will do to determine our progress. |
| self.total_steps = 5 |
| |
| def _reset(self): |
| super(ShellTask, self)._reset() |
| self.git_path = None |
| self.job_working_dir = None |
| self.shell_output_log = None |
| self.git_prep_log = None |
| self.output_summary = None |
| |
| def do_job_steps(self): |
| self.log.info('Step 1: Setup environment') |
| self._setup_environment() |
| |
| self.log.info('Step 2: Checkout updates from git') |
| self._grab_patchset(self.job_arguments) |
| |
| self.log.info('Step 3: Run shell script') |
| self._execute_script() |
| |
| self.log.info('Step 4: Analyse logs for errors') |
| self._parse_and_check_results() |
| |
| self.log.info('Step 5: Handle the results') |
| self._handle_results() |
| |
| self.log.info('Step 6: Handle extra actions such as shutting down') |
| self._handle_cleanup() |
| |
| @common.task_step |
| def _setup_environment(self): |
| self.git_prep_log = os.path.join( |
| self.job_results_dir, |
| 'git_prep.log' |
| ) |
| self.shell_output_log = os.path.join( |
| self.job_results_dir, |
| 'shell_output.log' |
| ) |
| self.output_summary = tempfile.mkstemp() |
| self.log.info('Working on node %s' % (os.uname()[1])) |
| |
| @common.task_step |
| def _grab_patchset(self, job_args): |
| """ Checkout the reference into config['git_working_dir'] """ |
| |
| # TODO(jhesketh): Use the zuul cloner stuff instead :-) |
| |
| self.log.debug("Grab the patchset we want to test against") |
| local_path = os.path.join(self.worker_server.config['git_working_dir'], |
| self.th_uuid, job_args['ZUUL_PROJECT']) |
| if not os.path.exists(local_path): |
| os.makedirs(local_path) |
| |
| env = os.environ |
| git_args = copy.deepcopy(job_args) |
| env.update(git_args) |
| |
| cmd = os.path.join( |
| os.path.join(os.path.dirname(os.path.abspath(__file__)), |
| 'gerrit-git-prep.sh') |
| ) |
| cmd += ' ' + self.worker_server.config['zuul_server']['gerrit_site'] |
| cmd += ' ' + self.worker_server.config['zuul_server']['git_origin'] |
| |
| # NOTE(jhesketh): The most common problem is the git remote timing out |
| # Retry cloning multiple times before raising a failure. |
| tries = 0 |
| return_code = 1 |
| while return_code != 0: |
| tries += 1 |
| env.update({'GIT_CURL_VERBOSE': '1', 'GIT_TRACE': '1'}) |
| return_code = utils.execute_to_log(cmd, self.git_prep_log, |
| env=env, cwd=local_path) |
| if tries == 2: |
| # Try upping the post buffer. See: |
| # http://stackoverflow.com/questions/6842687/ |
| # the-remote-end-hung-up-unexpectedly-while-git-cloning |
| utils.execute_to_log( |
| "git config --global http.postBuffer 1048576000", |
| self.git_prep_log, env=env, cwd=local_path) |
| if tries >= 4: |
| break |
| if return_code != 0: |
| cmd = 'ifconfig' |
| utils.execute_to_log(cmd, self.git_prep_log) |
| raise Exception("Failed to fetch patchset") |
| self.git_path = local_path |
| return local_path |
| |
| @common.task_step |
| def _execute_script(self): |
| # Run script |
| cmd = self.job_config['shell_script'] |
| cmd += ( |
| (' %(git_path)s %(job_working_dir)s %(unique_id)s') |
| % { |
| 'git_path': self.git_path, |
| 'job_working_dir': self.job_working_dir, |
| 'unique_id': self.job.unique |
| } |
| ) |
| |
| env_args = copy.deepcopy(os.environ) |
| env_args.update(self.job_arguments) |
| if self.job.name.startswith('build:'): |
| env_args['TH_JOB_NAME'] = self.job.name[len('build:'):] |
| else: |
| env_args['TH_JOB_NAME'] = self.job.name |
| env_args['TH_RESULT_FILE'] = self.output_summary[1] |
| |
| self.script_return_code = utils.execute_to_log( |
| cmd, |
| self.shell_output_log, |
| env=env_args |
| ) |
| |
| @common.task_step |
| def _parse_and_check_results(self): |
| if self.script_return_code > 0: |
| self.success = False |
| with os.fdopen(self.output_summary[0]) as fp: |
| line = fp.readline().strip() |
| if len(line) and not line.startswith('SUCCESS'): |
| self.messages.append(line) |
| self.messages.append('Return code from test script was non-zero ' |
| '(%d)' % self.script_return_code) |
| |
| @common.task_step |
| def _handle_cleanup(self): |
| """Handle and cleanup functions. Shutdown if requested to so that no |
| further jobs are ran if the environment is dirty.""" |
| |
| try: |
| os.remove(self.output_summary[1]) |
| except OSError: |
| pass |
| |
| @common.task_step |
| def _handle_results(self): |
| pass |