blob: 0673869e3cf0a9cdd02666040db631e7ad1f7be1 [file] [log] [blame]
Joshua Hesketh39a0fee2013-07-31 12:00:53 +10001# Copyright 2013 Rackspace Australia
2#
3# Licensed under the Apache License, Version 2.0 (the "License"); you may
4# not use this file except in compliance with the License. You may obtain
5# a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12# License for the specific language governing permissions and limitations
13# under the License.
14
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100015
16import git
17import logging
Joshua Hesketh05baf012014-12-02 16:33:09 +110018import magic
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100019import os
Joshua Hesketh221ae742014-01-22 16:09:58 +110020import requests
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100021import select
Joshua Hesketh2e4b6112013-08-12 13:03:06 +100022import shutil
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100023import subprocess
Joshua Hesketh11ed32c2013-08-09 10:42:36 +100024import swiftclient
Joshua Hesketh05baf012014-12-02 16:33:09 +110025import sys
26import tempfile
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100027import time
28
29
Michael Still9abb2a42014-01-10 14:13:15 +110030log = logging.getLogger('lib.utils')
31
32
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100033class GitRepository(object):
34
35 """ Manage a git repository for our uses """
Joshua Hesketh363d0042013-07-26 11:44:07 +100036 log = logging.getLogger("lib.utils.GitRepository")
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100037
38 def __init__(self, remote_url, local_path):
39 self.remote_url = remote_url
40 self.local_path = local_path
41 self._ensure_cloned()
42
43 self.repo = git.Repo(self.local_path)
44
Joshua Hesketh11ed32c2013-08-09 10:42:36 +100045 def _ensure_cloned(self):
46 if not os.path.exists(self.local_path):
47 self.log.debug("Cloning from %s to %s" % (self.remote_url,
48 self.local_path))
49 git.Repo.clone_from(self.remote_url, self.local_path)
50
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100051 def fetch(self, ref):
52 # The git.remote.fetch method may read in git progress info and
53 # interpret it improperly causing an AssertionError. Because the
54 # data was fetched properly subsequent fetches don't seem to fail.
55 # So try again if an AssertionError is caught.
56 origin = self.repo.remotes.origin
57 self.log.debug("Fetching %s from %s" % (ref, origin))
58
59 try:
60 origin.fetch(ref)
61 except AssertionError:
62 origin.fetch(ref)
63
64 def checkout(self, ref):
65 self.log.debug("Checking out %s" % ref)
66 return self.repo.git.checkout(ref)
67
Joshua Hesketh11ed32c2013-08-09 10:42:36 +100068 def reset(self):
69 self._ensure_cloned()
70 self.log.debug("Resetting repository %s" % self.local_path)
71 self.update()
72 origin = self.repo.remotes.origin
73 for ref in origin.refs:
74 if ref.remote_head == 'HEAD':
75 continue
76 self.repo.create_head(ref.remote_head, ref, force=True)
77
78 # Reset to remote HEAD (usually origin/master)
79 self.repo.head.reference = origin.refs['HEAD']
80 self.repo.head.reset(index=True, working_tree=True)
81 self.repo.git.clean('-x', '-f', '-d')
82
83 def update(self):
84 self._ensure_cloned()
85 self.log.debug("Updating repository %s" % self.local_path)
86 origin = self.repo.remotes.origin
87 origin.update()
88 # If the remote repository is repacked, the repo object's
89 # cache may be out of date. Specifically, it caches whether
90 # to check the loose or packed DB for a given SHA. Further,
91 # if there was no pack or lose directory to start with, the
92 # repo object may not even have a database for it. Avoid
93 # these problems by recreating the repo object.
94 self.repo = git.Repo(self.local_path)
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100095
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100096
Joshua Hesketh96052bf2014-04-05 19:48:06 +110097def execute_to_log(cmd, logfile, timeout=-1, watch_logs=[], heartbeat=30,
98 env=None, cwd=None):
Joshua Hesketh0ddd6382013-07-26 10:33:36 +100099 """ Executes a command and logs the STDOUT/STDERR and output of any
100 supplied watch_logs from logs into a new logfile
101
102 watch_logs is a list of tuples with (name,file) """
103
104 if not os.path.isdir(os.path.dirname(logfile)):
105 os.makedirs(os.path.dirname(logfile))
106
Joshua Heskethc7e963b2013-09-11 14:11:31 +1000107 logger = logging.getLogger(logfile)
Michael Still732d25c2013-12-05 04:17:25 +1100108 log_handler = logging.FileHandler(logfile)
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000109 log_formatter = logging.Formatter('%(asctime)s %(message)s')
Michael Still732d25c2013-12-05 04:17:25 +1100110 log_handler.setFormatter(log_formatter)
111 logger.addHandler(log_handler)
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000112
113 descriptors = {}
114
115 for watch_file in watch_logs:
Michael Stillbe745262014-01-06 19:51:06 +1100116 if not os.path.exists(watch_file[1]):
117 logger.warning('Failed to monitor log file %s: file not found'
118 % watch_file[1])
119 continue
120
121 try:
122 fd = os.open(watch_file[1], os.O_RDONLY)
123 os.lseek(fd, 0, os.SEEK_END)
124 descriptors[fd] = {'name': watch_file[0],
125 'poll': select.POLLIN,
126 'lines': ''}
127 except Exception as e:
128 logger.warning('Failed to monitor log file %s: %s'
129 % (watch_file[1], e))
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000130
131 cmd += ' 2>&1'
Joshua Hesketh96052bf2014-04-05 19:48:06 +1100132 logger.info("[running %s]" % cmd)
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000133 start_time = time.time()
134 p = subprocess.Popen(
Michael Stille8cadae2014-01-06 19:47:27 +1100135 cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
136 env=env, cwd=cwd)
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000137
138 descriptors[p.stdout.fileno()] = dict(
Joshua Hesketh1ab465f2013-07-26 13:57:28 +1000139 name='[output]',
Joshua Hesketh09b2f7f2013-07-29 09:05:58 +1000140 poll=(select.POLLIN | select.POLLHUP),
141 lines=''
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000142 )
143
144 poll_obj = select.poll()
145 for fd, descriptor in descriptors.items():
146 poll_obj.register(fd, descriptor['poll'])
147
148 last_heartbeat = time.time()
149
Joshua Hesketh1ab465f2013-07-26 13:57:28 +1000150 def process(fd):
151 """ Write the fd to log """
Joshua Hesketh3c0490b2013-08-12 10:33:40 +1000152 global last_heartbeat
Joshua Hesketh1ab465f2013-07-26 13:57:28 +1000153 descriptors[fd]['lines'] += os.read(fd, 1024 * 1024)
154 # Avoid partial lines by only processing input with breaks
Joshua Hesketh09b2f7f2013-07-29 09:05:58 +1000155 if descriptors[fd]['lines'].find('\n') != -1:
Joshua Hesketh1ab465f2013-07-26 13:57:28 +1000156 elems = descriptors[fd]['lines'].split('\n')
157 # Take all but the partial line
158 for l in elems[:-1]:
159 if len(l) > 0:
160 l = '%s %s' % (descriptors[fd]['name'], l)
161 logger.info(l)
162 last_heartbeat = time.time()
163 # Place the partial line back into lines to be processed
164 descriptors[fd]['lines'] = elems[-1]
165
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000166 while p.poll() is None:
167 if timeout > 0 and time.time() - start_time > timeout:
168 # Append to logfile
169 logger.info("[timeout]")
170 os.kill(p.pid, 9)
171
172 for fd, flag in poll_obj.poll(0):
Joshua Hesketh1ab465f2013-07-26 13:57:28 +1000173 process(fd)
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000174
Joshua Hesketh96052bf2014-04-05 19:48:06 +1100175 if heartbeat and (time.time() - last_heartbeat > heartbeat):
Joshua Hesketh0ddd6382013-07-26 10:33:36 +1000176 # Append to logfile
177 logger.info("[heartbeat]")
178 last_heartbeat = time.time()
179
Joshua Hesketh1ab465f2013-07-26 13:57:28 +1000180 # Do one last write to get the remaining lines
181 for fd, flag in poll_obj.poll(0):
182 process(fd)
183
Joshua Hesketh86ab0642013-08-30 13:41:58 +1000184 # Clean up
185 for fd, descriptor in descriptors.items():
Joshua Hesketh8ca96fb2013-08-30 18:17:19 +1000186 poll_obj.unregister(fd)
Joshua Hesketh6ad492c2014-04-08 17:12:02 +1000187 if fd == p.stdout.fileno():
188 # Don't try and close the process, it'll clean itself up
189 continue
Joshua Hesketh105af412013-09-02 10:24:36 +1000190 os.close(fd)
Joshua Hesketh721781d2013-09-02 16:06:01 +1000191 try:
192 p.kill()
193 except OSError:
194 pass
Joshua Hesketh86ab0642013-08-30 13:41:58 +1000195
Joshua Hesketh363d0042013-07-26 11:44:07 +1000196 logger.info('[script exit code = %d]' % p.returncode)
Michael Still732d25c2013-12-05 04:17:25 +1100197 logger.removeHandler(log_handler)
198 log_handler.flush()
199 log_handler.close()
Michael Still5231d4c2013-12-24 17:47:59 +1100200 return p.returncode
Joshua Hesketh926502f2013-07-31 11:56:40 +1000201
Joshua Hesketh9f898052013-08-09 10:52:34 +1000202
Joshua Hesketh221ae742014-01-22 16:09:58 +1100203def zuul_swift_upload(file_path, job_arguments):
204 """Upload working_dir to swift as per zuul's instructions"""
Joshua Hesketh05baf012014-12-02 16:33:09 +1100205 # TODO(jhesketh): replace with swift_form_post_submit from below
206
Joshua Hesketh221ae742014-01-22 16:09:58 +1100207 # NOTE(jhesketh): Zuul specifies an object prefix in the destination so
208 # we don't need to be concerned with results_set_name
209
210 file_list = []
211 if os.path.isfile(file_path):
212 file_list.append(file_path)
213 elif os.path.isdir(file_path):
214 for path, folders, files in os.walk(file_path):
215 for f in files:
216 f_path = os.path.join(path, f)
217 file_list.append(f_path)
218
219 # We are uploading the file_list as an HTTP POST multipart encoded.
220 # First grab out the information we need to send back from the hmac_body
221 payload = {}
222 (object_prefix,
223 payload['redirect'],
224 payload['max_file_size'],
225 payload['max_file_count'],
226 payload['expires']) = \
227 job_arguments['ZUUL_EXTRA_SWIFT_HMAC_BODY'].split('\n')
228
229 url = job_arguments['ZUUL_EXTRA_SWIFT_URL']
230 payload['signature'] = job_arguments['ZUUL_EXTRA_SWIFT_SIGNATURE']
231 logserver_prefix = job_arguments['ZUUL_EXTRA_SWIFT_LOGSERVER_PREFIX']
232
233 files = {}
234 for i, f in enumerate(file_list):
235 files['file%d' % (i + 1)] = open(f, 'rb')
236
237 requests.post(url, data=payload, files=files)
238
239 return (logserver_prefix +
240 job_arguments['ZUUL_EXTRA_SWIFT_DESTINATION_PREFIX'])
Joshua Hesketh05baf012014-12-02 16:33:09 +1100241
242
243def generate_log_index(file_list, logserver_prefix, results_set_name):
244 """Create an index of logfiles and links to them"""
245
246 output = '<html><head><title>Index of results</title></head><body>'
247 output += '<ul>'
248 for f in file_list:
249 file_url = os.path.join(logserver_prefix, results_set_name, f)
250 # Because file_list is simply a list to create an index for and it
251 # isn't necessarily on disk we can't check if a file is a folder or
252 # not. As such we normalise the name to get the folder/filename but
253 # then need to check if the last character was a trailing slash so to
254 # re-append it to make it obvious that it links to a folder
255 filename_postfix = '/' if f[-1] == '/' else ''
256 filename = os.path.basename(os.path.normpath(f)) + filename_postfix
257 output += '<li>'
258 output += '<a href="%s">%s</a>' % (file_url, filename)
259 output += '</li>'
260
261 output += '</ul>'
262 output += '</body></html>'
263 return output
264
265
266def make_index_file(file_list, logserver_prefix, results_set_name,
267 index_filename='index.html'):
268 """Writes an index into a file for pushing"""
269
270 index_content = generate_log_index(file_list, logserver_prefix,
271 results_set_name)
272 tempdir = tempfile.mkdtemp()
273 fd = open(os.path.join(tempdir, index_filename), 'w')
274 fd.write(index_content)
275 return os.path.join(tempdir, index_filename)
276
277
278def get_file_mime(file_path):
279 """Get the file mime using libmagic"""
280
281 if not os.path.isfile(file_path):
282 return None
283
284 if hasattr(magic, 'from_file'):
285 return magic.from_file(file_path, mime=True)
286 else:
287 # no magic.from_file, we might be using the libmagic bindings
288 m = magic.open(magic.MAGIC_MIME)
289 m.load()
290 return m.file(file_path).split(';')[0]
291
292
293def swift_form_post_submit(file_list, url, hmac_body, signature):
294 """Send the files to swift via the FormPost middleware"""
295
296 # We are uploading the file_list as an HTTP POST multipart encoded.
297 # First grab out the information we need to send back from the hmac_body
298 payload = {}
299
300 (object_prefix,
301 payload['redirect'],
302 payload['max_file_size'],
303 payload['max_file_count'],
304 payload['expires']) = hmac_body.split('\n')
305 payload['signature'] = signature
306
307 # Loop over the file list in chunks of max_file_count
308 for sub_file_list in (file_list[pos:pos + int(payload['max_file_count'])]
309 for pos in xrange(0, len(file_list),
310 int(payload['max_file_count']))):
311 if payload['expires'] < time.time():
312 raise Exception("Ran out of time uploading files!")
313 files = {}
314 # Zuul's log path is generated without a tailing slash. As such the
315 # object prefix does not contain a slash and the files would be
316 # uploaded as 'prefix' + 'filename'. Assume we want the destination
317 # url to look like a folder and make sure there's a slash between.
318 filename_prefix = '/' if url[-1] != '/' else ''
319 for i, f in enumerate(sub_file_list):
320 if os.path.getsize(f['path']) > int(payload['max_file_size']):
321 sys.stderr.write('Warning: %s exceeds %d bytes. Skipping...\n'
322 % (f['path'], int(payload['max_file_size'])))
323 continue
324 files['file%d' % (i + 1)] = (filename_prefix + f['filename'],
325 open(f['path'], 'rb'),
326 get_file_mime(f['path']))
327 requests.post(url, data=payload, files=files)
328
329
330def build_file_list(file_path, logserver_prefix, results_set_name,
331 create_dir_indexes=True):
332 """Generate a list of files to upload to zuul. Recurses through directories
333 and generates index.html files if requested."""
334
335 # file_list: a list of dicts with {path=..., filename=...} where filename
336 # is appended to the end of the object (paths can be used)
337 file_list = []
338 if os.path.isfile(file_path):
339 file_list.append({'filename': os.path.basename(file_path),
340 'path': file_path})
341 elif os.path.isdir(file_path):
342 if file_path[-1] == os.sep:
343 file_path = file_path[:-1]
344 parent_dir = os.path.dirname(file_path)
345 for path, folders, files in os.walk(file_path):
346 folder_contents = []
347 for f in files:
348 full_path = os.path.join(path, f)
349 relative_name = os.path.relpath(full_path, parent_dir)
350 push_file = {'filename': relative_name,
351 'path': full_path}
352 file_list.append(push_file)
353 folder_contents.append(relative_name)
354
355 for f in folders:
356 full_path = os.path.join(path, f)
357 relative_name = os.path.relpath(full_path, parent_dir)
358 folder_contents.append(relative_name + '/')
359
360 if create_dir_indexes:
361 index_file = make_index_file(folder_contents, logserver_prefix,
362 results_set_name)
363 relative_name = os.path.relpath(path, parent_dir)
364 file_list.append({
365 'filename': os.path.join(relative_name,
366 os.path.basename(index_file)),
367 'path': index_file})
368
369 return file_list
370
371
372def push_files(results_set_name, path_list, publish_config,
373 generate_indexes=True):
374 """ Push a log file/foler to a server. Returns the public URL """
375
376 file_list = []
377 root_list = []
378
379 for file_path in path_list:
380 file_path = os.path.normpath(file_path)
381 if os.path.isfile(file_path):
382 root_list.append(os.path.basename(file_path))
383 else:
384 root_list.append(os.path.basename(file_path) + '/')
385
386 file_list += build_file_list(
387 file_path, publish_config['prepend_url'], results_set_name,
388 generate_indexes
389 )
390
391 index_file = ''
392 if generate_indexes:
393 index_file = make_index_file(root_list, publish_config['prepend_url'],
394 results_set_name)
395 file_list.append({
396 'filename': os.path.basename(index_file),
397 'path': index_file})
398
399 method = publish_config['type'] + '_push_files'
400 if method in globals() and hasattr(globals()[method], '__call__'):
401 globals()[method](results_set_name, file_list, publish_config)
402
403 return os.path.join(publish_config['prepend_url'], results_set_name,
404 os.path.basename(index_file))
405
406
407def swift_push_files(results_set_name, file_list, swift_config):
408 """ Push a log file to a swift server. """
409 for file_item in file_list:
410 with open(file_item['path'], 'r') as fd:
411 con = swiftclient.client.Connection(
412 authurl=swift_config['authurl'],
413 user=swift_config['user'],
414 key=swift_config['password'],
415 os_options={'region_name': swift_config['region']},
416 tenant_name=swift_config['tenant'],
417 auth_version=2.0)
418 filename = os.path.join(results_set_name, file_item['filename'])
419 con.put_object(swift_config['container'], filename, fd)
420
421
422def local_push_files(results_set_name, file_list, local_config):
423 """ Copy the file locally somewhere sensible """
424 for file_item in file_list:
425 dest_dir = os.path.join(local_config['path'], results_set_name,
426 os.path.dirname(file_item['filename']))
427 dest_filename = os.path.basename(file_item['filename'])
428 if not os.path.isdir(dest_dir):
429 os.makedirs(dest_dir)
430
431 dest_file = os.path.join(dest_dir, dest_filename)
432 shutil.copyfile(file_item['path'], dest_file)
433
434
435def scp_push_files(results_set_name, file_path, local_config):
436 """ Copy the file remotely over ssh """
437 # TODO!
438 pass