From b2411fe483c7c1b8bbe469e567b1c9f686f948c7 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Sat, 3 Nov 2018 22:09:18 -0400 Subject: [PATCH] redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that. --- builder.py | 80 +++++++++++++++++++++- jwack.py | 3 + log.py | 30 ++++---- redo-ifchange.py | 8 ++- redo-log | 1 + redo-log.py | 175 +++++++++++++++++++++++++++++++++++++++++++++++ redo.py | 16 ++++- state.py | 18 +++-- vars.py | 1 + vars_init.py | 6 ++ 10 files changed, 315 insertions(+), 23 deletions(-) create mode 120000 redo-log create mode 100755 redo-log.py diff --git a/builder.py b/builder.py index 4cc0fc8..b6ae049 100644 --- a/builder.py +++ b/builder.py @@ -1,7 +1,7 @@ import sys, os, errno, random, stat, signal, time import vars, jwack, state, paths from helpers import unlink, close_on_exec, join -from log import log, log_, debug, debug2, err, warn +from log import log, log_, debug, debug2, err, warn, check_tty def _nice(t): @@ -18,6 +18,72 @@ def _try_stat(filename): raise +log_reader_pid = None + + +def start_stdin_log_reader(status, details): + if vars.RAW_LOGS: return + global log_reader_pid + r, w = os.pipe() # main pipe to redo-log + ar, aw = os.pipe() # ack pipe from redo-log --ack-fd + sys.stdout.flush() + sys.stderr.flush() + pid = os.fork() + if pid: + # parent + log_reader_pid = pid + os.close(r) + os.close(aw) + b = os.read(ar, 8) + if not b: + # subprocess died without sending us anything: that's bad. + err('failed to start redo-log subprocess; cannot continue.\n') + os._exit(99) + assert b == 'REDO-OK\n' + # now we know the subproc is running and will report our errors + # to stderr, so it's okay to lose our own stderr. + os.close(ar) + os.dup2(w, 1) + os.dup2(w, 2) + os.close(w) + check_tty() + else: + # child + try: + os.close(ar) + os.close(w) + os.dup2(r, 0) + os.close(r) + argv = [ + 'redo-log', + '--recursive', '--follow', + '--ack-fd', str(aw), + ('--status' if status else '--no-status'), + ('--details' if details else '--no-details'), + '-' + ] + os.execvp(argv[0], argv) + except Exception, e: + sys.stderr.write('redo-log: exec: %s\n' % e) + finally: + os._exit(99) + + +def await_log_reader(): + if vars.RAW_LOGS: return + global log_reader_pid + if log_reader_pid > 0: + # never actually close fd#1 or fd#2; insanity awaits. + # replace it with something else instead. + # Since our stdout/stderr are attached to redo-log's stdin, + # this will notify redo-log that it's time to die (after it finishes + # reading the logs) + out = open('/dev/tty', 'w') + os.dup2(out.fileno(), 1) + os.dup2(out.fileno(), 2) + os.waitpid(log_reader_pid, 0) + + class ImmediateReturn(Exception): def __init__(self, rv): Exception.__init__(self, "immediate return with exit code %d" % rv) @@ -44,12 +110,14 @@ class BuildJob: assert(self.lock.owned) try: try: - dirty = self.shouldbuildfunc(self.t) + is_target, dirty = self.shouldbuildfunc(self.t) except state.CyclicDependencyError: err('cyclic dependency while checking %s\n' % _nice(self.t)) raise ImmediateReturn(208) if not dirty: # target doesn't need to be built; skip the whole task + if is_target and vars.DEBUG_LOCKS: + log('[unchanged] %s\n' % _nice(self.t)) return self._after2(0) except ImmediateReturn, e: return self._after2(e.rv) @@ -117,6 +185,9 @@ class BuildJob: firstline = open(os.path.join(dodir, dofile)).readline().strip() if firstline.startswith('#!/'): argv[0:2] = firstline[2:].split(' ') + # make sure to create the logfile *before* writing the log about it. + # that way redo-log won't trace into an obsolete logfile. + if not vars.RAW_LOGS: open(state.logname(self.sf.id), 'w') log('%s\n' % _nice(t)) self.dodir = dodir self.basename = basename @@ -173,6 +244,11 @@ class BuildJob: os.dup2(self.f.fileno(), 1) os.close(self.f.fileno()) close_on_exec(1, False) + if not vars.RAW_LOGS: + logf = open(state.logname(self.sf.id), 'w') + os.dup2(logf.fileno(), 2) + close_on_exec(2, False) + logf.close() signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE if vars.VERBOSE or vars.XTRACE: log_('* %s\n' % ' '.join(self.argv)) os.execvp(self.argv[0], self.argv) diff --git a/jwack.py b/jwack.py index a674e1a..949b756 100644 --- a/jwack.py +++ b/jwack.py @@ -37,6 +37,9 @@ def _timeout(sig, frame): pass +# We make the pipes use the first available fd numbers starting at startfd. +# This makes it easier to differentiate different kinds of pipes when using +# strace. def _make_pipe(startfd): (a,b) = os.pipe() fds = (fcntl.fcntl(a, fcntl.F_DUPFD, startfd), diff --git a/log.py b/log.py index 2acf000..0691e4b 100644 --- a/log.py +++ b/log.py @@ -1,20 +1,24 @@ import sys, os import vars -# By default, no output colouring. -RED = "" -GREEN = "" -YELLOW = "" -BOLD = "" -PLAIN = "" -if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb': - # ...use ANSI formatting codes. - RED = "\x1b[31m" - GREEN = "\x1b[32m" - YELLOW = "\x1b[33m" - BOLD = "\x1b[1m" - PLAIN = "\x1b[m" +def check_tty(): + global RED, GREEN, YELLOW, BOLD, PLAIN + if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb': + # ...use ANSI formatting codes. + RED = "\x1b[31m" + GREEN = "\x1b[32m" + YELLOW = "\x1b[33m" + BOLD = "\x1b[1m" + PLAIN = "\x1b[m" + else: + RED = "" + GREEN = "" + YELLOW = "" + BOLD = "" + PLAIN = "" + +check_tty() def log_(s): diff --git a/redo-ifchange.py b/redo-ifchange.py index af232be..081d4e9 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -14,11 +14,13 @@ def should_build(t): raise builder.ImmediateReturn(32) dirty = deps.isdirty(f, depth = '', max_changed = vars.RUNID, already_checked=[]) - return dirty==[f] and deps.DIRTY or dirty + return f.is_generated, dirty==[f] and deps.DIRTY or dirty rv = 202 try: + if vars_init.is_toplevel: + builder.start_stdin_log_reader(status=True, details=True) if vars.TARGET and not vars.UNLOCKED: me = os.path.join(vars.STARTDIR, os.path.join(vars.PWD, vars.TARGET)) @@ -41,6 +43,10 @@ try: finally: jwack.force_return_tokens() except KeyboardInterrupt: + if vars_init.is_toplevel: + builder.await_log_reader() sys.exit(200) state.commit() +if vars_init.is_toplevel: + builder.await_log_reader() sys.exit(rv) diff --git a/redo-log b/redo-log new file mode 120000 index 0000000..dd3b7c1 --- /dev/null +++ b/redo-log @@ -0,0 +1 @@ +redo-log.py \ No newline at end of file diff --git a/redo-log.py b/redo-log.py new file mode 100755 index 0000000..c7effa7 --- /dev/null +++ b/redo-log.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +import errno, os, re, sys, time +import options + +optspec = """ +redo-log [options...] [targets...] +-- +r,recursive show build logs for dependencies too +u,unchanged show lines for dependencies not needing to be rebuilt +f,follow keep watching for more lines to be appended (like tail -f) +no-details only show 'redo' recursion trace, not build output +no-colorize don't colorize 'redo' log messages +no-status don't display build summary line in --follow +ack-fd= (internal use only) print REDO-OK to this fd upon starting +""" +o = options.Options(optspec) +(opt, flags, extra) = o.parse(sys.argv[1:]) +targets = extra + +import vars_init +vars_init.init(list(targets)) + +import vars, state + +already = set() +queue = [] +depth = [] +total_lines = 0 +status = None + + +# regexp for matching "redo" lines in the log, which we use for recursion. +# format: +# redo path/to/target which might have spaces +# redo [unchanged] path/to/target which might have spaces +# redo path/to/target which might have spaces (comment) +# FIXME: use a more structured format when writing the logs. +# That will prevent false positives and negatives. Then transform the +# structured format into a user-friendly format when printing in redo-log. +REDO_LINE_RE = re.compile(r'^redo\s+(\[\w+\] )?([^(:]+)( \([^)]+\))?\n$') + + +def is_locked(fid): + return (fid is not None) and not state.Lock(fid=fid).trylock() + + +def catlog(t): + global total_lines, status + if t in already: + return + depth.append(t) + already.add(t) + if t == '-': + f = sys.stdin + fid = None + logname = None + else: + try: + sf = state.File(name=t, allow_add=False) + except KeyError: + sys.stderr.write('redo-log: %r: not known to redo.\n' % (t,)) + sys.exit(24) + fid = sf.id + del sf + state.rollback() + logname = state.logname(fid) + f = None + delay = 0.01 + was_locked = is_locked(fid) + line_head = '' + while 1: + if not f: + try: + f = open(logname) + except IOError, e: + if e.errno == errno.ENOENT: + # ignore files without logs + pass + else: + raise + if f: + # Note: normally includes trailing \n. + # In 'follow' mode, might get a line with no trailing \n + # (eg. when ./configure is halfway through a test), which we + # deal with below. + line = f.readline() + else: + line = None + if not line and (not opt.follow or not was_locked): + # file not locked, and no new lines: done + break + if not line: + was_locked = is_locked(fid) + if opt.follow: + if opt.status: + # FIXME use actual terminal width here + head = '[redo] %s ' % ('{:,}'.format(total_lines)) + tail = '' + for i in reversed(depth): + n = os.path.basename(i) + if 65 - len(head) - len(tail) < len(n) + 1: + tail = '... ' + tail + break + else: + tail = n + ' ' + tail + status = head + tail + sys.stdout.flush() + sys.stderr.write('\r%-70.70s\r' % status) + time.sleep(min(delay, 1.0)) + delay += 0.01 + continue + total_lines += 1 + delay = 0.01 + if not line.endswith('\n'): + line_head += line + continue + if line_head: + line = line_head + line + line_head = '' + if status: + sys.stdout.flush() + sys.stderr.write('\r%-70.70s\r' % '') + status = None + g = re.match(REDO_LINE_RE, line) + if g: + attr, name, comment = g.groups() + if attr == '[unchanged] ': + if opt.unchanged: + if name not in already: + sys.stdout.write(line) + if opt.recursive: + catlog(name) + else: + sys.stdout.write(line) + if opt.recursive and (not comment or comment == ' (WAITING)'): + assert name + catlog(name) + else: + if opt.details: + sys.stdout.write(line) + if status: + sys.stdout.flush() + sys.stderr.write('\r%-70.70s\r' % '') + status = None + if line_head: + # partial line never got terminated + print line_head + assert(depth[-1] == t) + depth.pop(-1) + +try: + if not targets: + sys.stderr.write('redo-log: give at least one target; maybe "all"?\n') + sys.exit(1) + if opt.status < 2 and not os.isatty(2): + opt.status = False + if opt.ack_fd: + ack_fd = int(opt.ack_fd) + assert(ack_fd > 2) + if os.write(ack_fd, 'REDO-OK\n') != 8: + raise Exception('write to ack_fd returned wrong length') + os.close(ack_fd) + queue += targets + while queue: + t = queue.pop(0) + if t != '-': + print 'redo %s' % t + catlog(t) +except KeyboardInterrupt: + sys.exit(200) +except IOError, e: + if e.errno == errno.EPIPE: + pass + else: + raise diff --git a/redo.py b/redo.py index 3324a4b..57c906c 100755 --- a/redo.py +++ b/redo.py @@ -12,6 +12,9 @@ v,verbose print commands as they are read from .do files (variables intact) x,xtrace print commands as they are executed (variables expanded) k,keep-going keep going as long as possible even if some targets fail shuffle randomize the build order to find dependency bugs +no-details only show 'redo' recursion trace (to see more later, use redo-log) +no-status don't display build summary line at the bottom of the screen +raw-logs don't use redo-log, just send all output straight to stderr debug-locks print messages about file locking (useful for debugging) debug-pids print process ids as part of log messages (useful for debugging) version print the current version and exit @@ -35,7 +38,10 @@ if opt.keep_going: os.environ['REDO_KEEP_GOING'] = '1' if opt.shuffle: os.environ['REDO_SHUFFLE'] = '1' -if opt.debug_locks: +if opt.raw_logs: + os.environ['REDO_RAW_LOGS'] = '1' +if opt.debug_locks or not os.environ.get('REDO_RAW_LOGS'): + # FIXME: force-enabled for redo-log, for now os.environ['REDO_DEBUG_LOCKS'] = '1' if opt.debug_pids: os.environ['REDO_DEBUG_PIDS'] = '1' @@ -47,6 +53,8 @@ import vars, state, builder, jwack from log import warn, err try: + if vars_init.is_toplevel: + builder.start_stdin_log_reader(status=opt.status, details=opt.details) for t in targets: if os.path.exists(t): f = state.File(name=t) @@ -61,13 +69,17 @@ try: jwack.setup(j) try: assert(state.is_flushed()) - retcode = builder.main(targets, lambda t: True) + retcode = builder.main(targets, lambda t: (True, True)) assert(state.is_flushed()) finally: try: state.rollback() finally: jwack.force_return_tokens() + if vars_init.is_toplevel: + builder.await_log_reader() sys.exit(retcode) except KeyboardInterrupt: + if vars_init.is_toplevel: + builder.await_log_reader() sys.exit(200) diff --git a/state.py b/state.py index d27f472..ea34096 100644 --- a/state.py +++ b/state.py @@ -174,7 +174,7 @@ class File(object): # use this mostly to avoid accidentally assigning to typos __slots__ = ['id'] + _file_cols[1:] - def _init_from_idname(self, id, name): + def _init_from_idname(self, id, name, allow_add): q = ('select %s from Files ' % join(', ', _file_cols)) if id != None: q += 'where rowid=?' @@ -189,7 +189,9 @@ class File(object): row = d.execute(q, l).fetchone() if not row: if not name: - raise Exception('No file with id=%r name=%r' % (id, name)) + raise KeyError('No file with id=%r name=%r' % (id, name)) + elif not allow_add: + raise KeyError('No file with name=%r' % (name,)) try: _write('insert into Files (name) values (?)', [name]) except sqlite3.IntegrityError: @@ -207,17 +209,17 @@ class File(object): if self.name == ALWAYS and self.changed_runid < vars.RUNID: self.changed_runid = vars.RUNID - def __init__(self, id=None, name=None, cols=None): + def __init__(self, id=None, name=None, cols=None, allow_add=True): if cols: return self._init_from_cols(cols) else: - return self._init_from_idname(id, name) + return self._init_from_idname(id, name, allow_add=allow_add) def __repr__(self): return "File(%r)" % (self.nicename(),) def refresh(self): - self._init_from_idname(self.id, None) + self._init_from_idname(self.id, None, allow_add=False) def save(self): cols = join(', ', ['%s=?'%i for i in _file_cols[2:]]) @@ -324,6 +326,11 @@ def files(): yield File(cols=cols) +def logname(fid): + """Given the id of a File, return the filename of its build log.""" + return os.path.join(vars.BASE, '.redo', 'log.%d' % fid) + + # FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python # doesn't do the lockdata structure in a portable way, so we have to use # fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's @@ -365,6 +372,7 @@ class Lock: raise else: self.owned = True + return self.owned def waitlock(self): self.check() diff --git a/vars.py b/vars.py index 25870fa..068c4c0 100644 --- a/vars.py +++ b/vars.py @@ -16,6 +16,7 @@ DEBUG_PIDS = os.environ.get('REDO_DEBUG_PIDS', '') and 1 or 0 VERBOSE = os.environ.get('REDO_VERBOSE', '') and 1 or 0 XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0 KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0 +RAW_LOGS = os.environ.get('REDO_RAW_LOGS', '') and 1 or 0 SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0 STARTDIR = os.environ.get('REDO_STARTDIR', '') RUNID = atoi(os.environ.get('REDO_RUNID')) or None diff --git a/vars_init.py b/vars_init.py index 641c3a3..693a8dc 100644 --- a/vars_init.py +++ b/vars_init.py @@ -1,9 +1,13 @@ import sys, os +is_toplevel = False + + def init_no_state(): if not os.environ.get('REDO'): os.environ['REDO'] = 'NOT_DEFINED' + is_toplevel = True if not os.environ.get('REDO_BASE'): os.environ['REDO_BASE'] = 'NOT_DEFINED' @@ -11,6 +15,8 @@ def init_no_state(): def init(targets): if not os.environ.get('REDO'): # toplevel call to redo + global is_toplevel + is_toplevel = True if len(targets) == 0: targets.append('all') exenames = [os.path.abspath(sys.argv[0]),