From b2411fe483c7c1b8bbe469e567b1c9f686f948c7 Mon Sep 17 00:00:00 2001
From: Avery Pennarun <apenwarr@gmail.com>
Date: Sat, 3 Nov 2018 22:09:18 -0400
Subject: [PATCH] redo-log: capture and linearize the output of redo builds.

redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory.  That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:

	redo-log -r all

The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target.  With -r (recursive), it does.  With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.

With --no-details, redo-log will show only the 'redo' lines, not the
other log messages.  For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.

You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes.  redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.

Still broken in this version:

- No man page or new tests yet.

- ANSI colors don't yet work (unless you use --raw-logs, which gives
  the old-style behaviour).

- You can't redirect the output of a sub-redo to a file or a
  pipe right now, because redo-log is eating it.

- The regex for matching 'redo' lines in the log is very gross.
  Instead, we should put the raw log files in a more machine-parseable
  format, and redo-log should turn that into human-readable format.

- redo-log tries to "linearize" the logs, which makes them
  comprehensible even for a large parallel build.  It recursively shows
  log messages for each target in depth-first tree order (by tracing
  into a new target every time it sees a 'redo' line).  This works
  really well, but in some specific cases, the "topmost" redo instance
  can get stuck waiting for a jwack token, which makes it look like the
  whole build has stalled, when really redo-log is just waiting a long
  time for a particular subprocess to be able to continue.  We'll need to
  add a specific workaround for that.
---
 builder.py       |  80 +++++++++++++++++++++-
 jwack.py         |   3 +
 log.py           |  30 ++++----
 redo-ifchange.py |   8 ++-
 redo-log         |   1 +
 redo-log.py      | 175 +++++++++++++++++++++++++++++++++++++++++++++++
 redo.py          |  16 ++++-
 state.py         |  18 +++--
 vars.py          |   1 +
 vars_init.py     |   6 ++
 10 files changed, 315 insertions(+), 23 deletions(-)
 create mode 120000 redo-log
 create mode 100755 redo-log.py

diff --git a/builder.py b/builder.py
index 4cc0fc8..b6ae049 100644
--- a/builder.py
+++ b/builder.py
@@ -1,7 +1,7 @@
 import sys, os, errno, random, stat, signal, time
 import vars, jwack, state, paths
 from helpers import unlink, close_on_exec, join
-from log import log, log_, debug, debug2, err, warn
+from log import log, log_, debug, debug2, err, warn, check_tty
 
 
 def _nice(t):
@@ -18,6 +18,72 @@ def _try_stat(filename):
             raise
 
 
+log_reader_pid = None
+
+
+def start_stdin_log_reader(status, details):
+    if vars.RAW_LOGS: return
+    global log_reader_pid
+    r, w = os.pipe()    # main pipe to redo-log
+    ar, aw = os.pipe()  # ack pipe from redo-log --ack-fd
+    sys.stdout.flush()
+    sys.stderr.flush()
+    pid = os.fork()
+    if pid:
+        # parent
+        log_reader_pid = pid
+        os.close(r)
+        os.close(aw)
+        b = os.read(ar, 8)
+        if not b:
+            # subprocess died without sending us anything: that's bad.
+            err('failed to start redo-log subprocess; cannot continue.\n')
+            os._exit(99)
+        assert b == 'REDO-OK\n'
+        # now we know the subproc is running and will report our errors
+        # to stderr, so it's okay to lose our own stderr.
+        os.close(ar)
+        os.dup2(w, 1)
+        os.dup2(w, 2)
+        os.close(w)
+        check_tty()
+    else:
+        # child
+        try:
+            os.close(ar)
+            os.close(w)
+            os.dup2(r, 0)
+            os.close(r)
+            argv = [
+                'redo-log',
+                '--recursive', '--follow',
+                '--ack-fd', str(aw),
+                ('--status' if status else '--no-status'),
+                ('--details' if details else '--no-details'),
+                '-'
+            ]
+            os.execvp(argv[0], argv)
+        except Exception, e:
+            sys.stderr.write('redo-log: exec: %s\n' % e)
+        finally:
+            os._exit(99)
+
+
+def await_log_reader():
+    if vars.RAW_LOGS: return
+    global log_reader_pid
+    if log_reader_pid > 0:
+        # never actually close fd#1 or fd#2; insanity awaits.
+        # replace it with something else instead.
+        # Since our stdout/stderr are attached to redo-log's stdin,
+        # this will notify redo-log that it's time to die (after it finishes
+        # reading the logs)
+        out = open('/dev/tty', 'w')
+        os.dup2(out.fileno(), 1)
+        os.dup2(out.fileno(), 2)
+        os.waitpid(log_reader_pid, 0)
+
+
 class ImmediateReturn(Exception):
     def __init__(self, rv):
         Exception.__init__(self, "immediate return with exit code %d" % rv)
@@ -44,12 +110,14 @@ class BuildJob:
         assert(self.lock.owned)
         try:
             try:
-                dirty = self.shouldbuildfunc(self.t)
+                is_target, dirty = self.shouldbuildfunc(self.t)
             except state.CyclicDependencyError:
                 err('cyclic dependency while checking %s\n' % _nice(self.t))
                 raise ImmediateReturn(208)
             if not dirty:
                 # target doesn't need to be built; skip the whole task
+                if is_target and vars.DEBUG_LOCKS:
+                    log('[unchanged] %s\n' % _nice(self.t))
                 return self._after2(0)
         except ImmediateReturn, e:
             return self._after2(e.rv)
@@ -117,6 +185,9 @@ class BuildJob:
         firstline = open(os.path.join(dodir, dofile)).readline().strip()
         if firstline.startswith('#!/'):
             argv[0:2] = firstline[2:].split(' ')
+        # make sure to create the logfile *before* writing the log about it.
+        # that way redo-log won't trace into an obsolete logfile.
+        if not vars.RAW_LOGS: open(state.logname(self.sf.id), 'w')
         log('%s\n' % _nice(t))
         self.dodir = dodir
         self.basename = basename
@@ -173,6 +244,11 @@ class BuildJob:
         os.dup2(self.f.fileno(), 1)
         os.close(self.f.fileno())
         close_on_exec(1, False)
+        if not vars.RAW_LOGS:
+            logf = open(state.logname(self.sf.id), 'w')
+            os.dup2(logf.fileno(), 2)
+            close_on_exec(2, False)
+            logf.close()
         signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # python ignores SIGPIPE
         if vars.VERBOSE or vars.XTRACE: log_('* %s\n' % ' '.join(self.argv))
         os.execvp(self.argv[0], self.argv)
diff --git a/jwack.py b/jwack.py
index a674e1a..949b756 100644
--- a/jwack.py
+++ b/jwack.py
@@ -37,6 +37,9 @@ def _timeout(sig, frame):
     pass
 
 
+# We make the pipes use the first available fd numbers starting at startfd.
+# This makes it easier to differentiate different kinds of pipes when using
+# strace.
 def _make_pipe(startfd):
     (a,b) = os.pipe()
     fds = (fcntl.fcntl(a, fcntl.F_DUPFD, startfd),
diff --git a/log.py b/log.py
index 2acf000..0691e4b 100644
--- a/log.py
+++ b/log.py
@@ -1,20 +1,24 @@
 import sys, os
 import vars
 
-# By default, no output colouring.
-RED    = ""
-GREEN  = ""
-YELLOW = ""
-BOLD   = ""
-PLAIN  = ""
 
-if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
-    # ...use ANSI formatting codes.
-    RED    = "\x1b[31m"
-    GREEN  = "\x1b[32m"
-    YELLOW = "\x1b[33m"
-    BOLD   = "\x1b[1m"
-    PLAIN  = "\x1b[m"
+def check_tty():
+    global RED, GREEN, YELLOW, BOLD, PLAIN
+    if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
+        # ...use ANSI formatting codes.
+        RED    = "\x1b[31m"
+        GREEN  = "\x1b[32m"
+        YELLOW = "\x1b[33m"
+        BOLD   = "\x1b[1m"
+        PLAIN  = "\x1b[m"
+    else:
+        RED    = ""
+        GREEN  = ""
+        YELLOW = ""
+        BOLD   = ""
+        PLAIN  = ""
+
+check_tty()
 
 
 def log_(s):
diff --git a/redo-ifchange.py b/redo-ifchange.py
index af232be..081d4e9 100755
--- a/redo-ifchange.py
+++ b/redo-ifchange.py
@@ -14,11 +14,13 @@ def should_build(t):
         raise builder.ImmediateReturn(32)
     dirty = deps.isdirty(f, depth = '', max_changed = vars.RUNID,
                          already_checked=[])
-    return dirty==[f] and deps.DIRTY or dirty
+    return f.is_generated, dirty==[f] and deps.DIRTY or dirty
 
 
 rv = 202
 try:
+    if vars_init.is_toplevel:
+        builder.start_stdin_log_reader(status=True, details=True)
     if vars.TARGET and not vars.UNLOCKED:
         me = os.path.join(vars.STARTDIR, 
                           os.path.join(vars.PWD, vars.TARGET))
@@ -41,6 +43,10 @@ try:
         finally:
             jwack.force_return_tokens()
 except KeyboardInterrupt:
+    if vars_init.is_toplevel:
+        builder.await_log_reader()
     sys.exit(200)
 state.commit()
+if vars_init.is_toplevel:
+    builder.await_log_reader()
 sys.exit(rv)
diff --git a/redo-log b/redo-log
new file mode 120000
index 0000000..dd3b7c1
--- /dev/null
+++ b/redo-log
@@ -0,0 +1 @@
+redo-log.py
\ No newline at end of file
diff --git a/redo-log.py b/redo-log.py
new file mode 100755
index 0000000..c7effa7
--- /dev/null
+++ b/redo-log.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python
+import errno, os, re, sys, time
+import options
+
+optspec = """
+redo-log [options...] [targets...]
+--
+r,recursive     show build logs for dependencies too
+u,unchanged     show lines for dependencies not needing to be rebuilt
+f,follow        keep watching for more lines to be appended (like tail -f)
+no-details      only show 'redo' recursion trace, not build output
+no-colorize     don't colorize 'redo' log messages
+no-status       don't display build summary line in --follow
+ack-fd=         (internal use only) print REDO-OK to this fd upon starting
+"""
+o = options.Options(optspec)
+(opt, flags, extra) = o.parse(sys.argv[1:])
+targets = extra
+
+import vars_init
+vars_init.init(list(targets))
+
+import vars, state
+
+already = set()
+queue = []
+depth = []
+total_lines = 0
+status = None
+
+
+# regexp for matching "redo" lines in the log, which we use for recursion.
+# format:
+#            redo  path/to/target which might have spaces
+#            redo  [unchanged] path/to/target which might have spaces
+#            redo  path/to/target which might have spaces (comment)
+# FIXME: use a more structured format when writing the logs.
+# That will prevent false positives and negatives.  Then transform the
+# structured format into a user-friendly format when printing in redo-log.
+REDO_LINE_RE = re.compile(r'^redo\s+(\[\w+\] )?([^(:]+)( \([^)]+\))?\n$')
+
+
+def is_locked(fid):
+    return (fid is not None) and not state.Lock(fid=fid).trylock()
+
+
+def catlog(t):
+    global total_lines, status
+    if t in already:
+        return
+    depth.append(t)
+    already.add(t)
+    if t == '-':
+        f = sys.stdin
+        fid = None
+        logname = None
+    else:
+        try:
+            sf = state.File(name=t, allow_add=False)
+        except KeyError:
+            sys.stderr.write('redo-log: %r: not known to redo.\n' % (t,))
+            sys.exit(24)
+        fid = sf.id
+        del sf
+        state.rollback()
+        logname = state.logname(fid)
+        f = None
+    delay = 0.01
+    was_locked = is_locked(fid)
+    line_head = ''
+    while 1:
+        if not f:
+            try:
+                f = open(logname)
+            except IOError, e:
+                if e.errno == errno.ENOENT:
+                    # ignore files without logs
+                    pass
+                else:
+                    raise
+        if f:
+            # Note: normally includes trailing \n.
+            # In 'follow' mode, might get a line with no trailing \n
+            # (eg. when ./configure is halfway through a test), which we
+            # deal with below.
+            line = f.readline()
+        else:
+            line = None
+        if not line and (not opt.follow or not was_locked):
+            # file not locked, and no new lines: done
+            break
+        if not line:
+            was_locked = is_locked(fid)
+            if opt.follow:
+                if opt.status:
+                    # FIXME use actual terminal width here
+                    head = '[redo] %s ' % ('{:,}'.format(total_lines))
+                    tail = ''
+                    for i in reversed(depth):
+                        n = os.path.basename(i)
+                        if 65 - len(head) - len(tail) < len(n) + 1:
+                            tail = '... ' + tail
+                            break
+                        else:
+                            tail = n + ' ' + tail
+                    status = head + tail
+                    sys.stdout.flush()
+                    sys.stderr.write('\r%-70.70s\r' % status)
+                time.sleep(min(delay, 1.0))
+                delay += 0.01
+            continue
+        total_lines += 1
+        delay = 0.01
+        if not line.endswith('\n'):
+            line_head += line
+            continue
+        if line_head:
+            line = line_head + line
+            line_head = ''
+        if status:
+            sys.stdout.flush()
+            sys.stderr.write('\r%-70.70s\r' % '')
+            status = None
+        g = re.match(REDO_LINE_RE, line)
+        if g:
+            attr, name, comment = g.groups()
+            if attr == '[unchanged] ':
+                if opt.unchanged:
+                    if name not in already:
+                        sys.stdout.write(line)
+                    if opt.recursive:
+                        catlog(name)
+            else:
+                sys.stdout.write(line)
+                if opt.recursive and (not comment or comment == ' (WAITING)'):
+                    assert name
+                    catlog(name)
+        else:
+            if opt.details:
+                sys.stdout.write(line)
+    if status:
+        sys.stdout.flush()
+        sys.stderr.write('\r%-70.70s\r' % '')
+        status = None
+    if line_head:
+        # partial line never got terminated
+        print line_head
+    assert(depth[-1] == t)
+    depth.pop(-1)
+
+try:
+    if not targets:
+        sys.stderr.write('redo-log: give at least one target; maybe "all"?\n')
+        sys.exit(1)
+    if opt.status < 2 and not os.isatty(2):
+        opt.status = False
+    if opt.ack_fd:
+        ack_fd = int(opt.ack_fd)
+        assert(ack_fd > 2)
+        if os.write(ack_fd, 'REDO-OK\n') != 8:
+            raise Exception('write to ack_fd returned wrong length')
+        os.close(ack_fd)
+    queue += targets
+    while queue:
+        t = queue.pop(0)
+        if t != '-':
+            print 'redo  %s' % t
+        catlog(t)
+except KeyboardInterrupt:
+    sys.exit(200)
+except IOError, e:
+    if e.errno == errno.EPIPE:
+        pass
+    else:
+        raise
diff --git a/redo.py b/redo.py
index 3324a4b..57c906c 100755
--- a/redo.py
+++ b/redo.py
@@ -12,6 +12,9 @@ v,verbose  print commands as they are read from .do files (variables intact)
 x,xtrace   print commands as they are executed (variables expanded)
 k,keep-going  keep going as long as possible even if some targets fail
 shuffle    randomize the build order to find dependency bugs
+no-details only show 'redo' recursion trace (to see more later, use redo-log)
+no-status  don't display build summary line at the bottom of the screen
+raw-logs   don't use redo-log, just send all output straight to stderr
 debug-locks  print messages about file locking (useful for debugging)
 debug-pids   print process ids as part of log messages (useful for debugging)
 version    print the current version and exit
@@ -35,7 +38,10 @@ if opt.keep_going:
     os.environ['REDO_KEEP_GOING'] = '1'
 if opt.shuffle:
     os.environ['REDO_SHUFFLE'] = '1'
-if opt.debug_locks:
+if opt.raw_logs:
+    os.environ['REDO_RAW_LOGS'] = '1'
+if opt.debug_locks or not os.environ.get('REDO_RAW_LOGS'):
+    # FIXME: force-enabled for redo-log, for now
     os.environ['REDO_DEBUG_LOCKS'] = '1'
 if opt.debug_pids:
     os.environ['REDO_DEBUG_PIDS'] = '1'
@@ -47,6 +53,8 @@ import vars, state, builder, jwack
 from log import warn, err
 
 try:
+    if vars_init.is_toplevel:
+        builder.start_stdin_log_reader(status=opt.status, details=opt.details)
     for t in targets:
         if os.path.exists(t):
             f = state.File(name=t)
@@ -61,13 +69,17 @@ try:
     jwack.setup(j)
     try:
         assert(state.is_flushed())
-        retcode = builder.main(targets, lambda t: True)
+        retcode = builder.main(targets, lambda t: (True, True))
         assert(state.is_flushed())
     finally:
         try:
             state.rollback()
         finally:
             jwack.force_return_tokens()
+    if vars_init.is_toplevel:
+        builder.await_log_reader()
     sys.exit(retcode)
 except KeyboardInterrupt:
+    if vars_init.is_toplevel:
+        builder.await_log_reader()
     sys.exit(200)
diff --git a/state.py b/state.py
index d27f472..ea34096 100644
--- a/state.py
+++ b/state.py
@@ -174,7 +174,7 @@ class File(object):
     # use this mostly to avoid accidentally assigning to typos
     __slots__ = ['id'] + _file_cols[1:]
 
-    def _init_from_idname(self, id, name):
+    def _init_from_idname(self, id, name, allow_add):
         q = ('select %s from Files ' % join(', ', _file_cols))
         if id != None:
             q += 'where rowid=?'
@@ -189,7 +189,9 @@ class File(object):
         row = d.execute(q, l).fetchone()
         if not row:
             if not name:
-                raise Exception('No file with id=%r name=%r' % (id, name))
+                raise KeyError('No file with id=%r name=%r' % (id, name))
+            elif not allow_add:
+                raise KeyError('No file with name=%r' % (name,))
             try:
                 _write('insert into Files (name) values (?)', [name])
             except sqlite3.IntegrityError:
@@ -207,17 +209,17 @@ class File(object):
         if self.name == ALWAYS and self.changed_runid < vars.RUNID:
             self.changed_runid = vars.RUNID
     
-    def __init__(self, id=None, name=None, cols=None):
+    def __init__(self, id=None, name=None, cols=None, allow_add=True):
         if cols:
             return self._init_from_cols(cols)
         else:
-            return self._init_from_idname(id, name)
+            return self._init_from_idname(id, name, allow_add=allow_add)
 
     def __repr__(self):
         return "File(%r)" % (self.nicename(),)
 
     def refresh(self):
-        self._init_from_idname(self.id, None)
+        self._init_from_idname(self.id, None, allow_add=False)
 
     def save(self):
         cols = join(', ', ['%s=?'%i for i in _file_cols[2:]])
@@ -324,6 +326,11 @@ def files():
         yield File(cols=cols)
 
 
+def logname(fid):
+    """Given the id of a File, return the filename of its build log."""
+    return os.path.join(vars.BASE, '.redo', 'log.%d' % fid)
+
+
 # FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here.  But python
 # doesn't do the lockdata structure in a portable way, so we have to use
 # fcntl.lockf() instead.  Usually this is just a wrapper for fcntl, so it's
@@ -365,6 +372,7 @@ class Lock:
                 raise
         else:
             self.owned = True
+        return self.owned
 
     def waitlock(self):
         self.check()
diff --git a/vars.py b/vars.py
index 25870fa..068c4c0 100644
--- a/vars.py
+++ b/vars.py
@@ -16,6 +16,7 @@ DEBUG_PIDS = os.environ.get('REDO_DEBUG_PIDS', '') and 1 or 0
 VERBOSE = os.environ.get('REDO_VERBOSE', '') and 1 or 0
 XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0
 KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0
+RAW_LOGS = os.environ.get('REDO_RAW_LOGS', '') and 1 or 0
 SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0
 STARTDIR = os.environ.get('REDO_STARTDIR', '')
 RUNID = atoi(os.environ.get('REDO_RUNID')) or None
diff --git a/vars_init.py b/vars_init.py
index 641c3a3..693a8dc 100644
--- a/vars_init.py
+++ b/vars_init.py
@@ -1,9 +1,13 @@
 import sys, os
 
 
+is_toplevel = False
+
+
 def init_no_state():
     if not os.environ.get('REDO'):
         os.environ['REDO'] = 'NOT_DEFINED'
+        is_toplevel = True
     if not os.environ.get('REDO_BASE'):
         os.environ['REDO_BASE'] = 'NOT_DEFINED'
 
@@ -11,6 +15,8 @@ def init_no_state():
 def init(targets):
     if not os.environ.get('REDO'):
         # toplevel call to redo
+        global is_toplevel
+        is_toplevel = True
         if len(targets) == 0:
             targets.append('all')
         exenames = [os.path.abspath(sys.argv[0]),