redo-log: capture and linearize the output of redo builds.

redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory.  That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:

	redo-log -r all

The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target.  With -r (recursive), it does.  With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.

With --no-details, redo-log will show only the 'redo' lines, not the
other log messages.  For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.

You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes.  redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.

Still broken in this version:

- No man page or new tests yet.

- ANSI colors don't yet work (unless you use --raw-logs, which gives
  the old-style behaviour).

- You can't redirect the output of a sub-redo to a file or a
  pipe right now, because redo-log is eating it.

- The regex for matching 'redo' lines in the log is very gross.
  Instead, we should put the raw log files in a more machine-parseable
  format, and redo-log should turn that into human-readable format.

- redo-log tries to "linearize" the logs, which makes them
  comprehensible even for a large parallel build.  It recursively shows
  log messages for each target in depth-first tree order (by tracing
  into a new target every time it sees a 'redo' line).  This works
  really well, but in some specific cases, the "topmost" redo instance
  can get stuck waiting for a jwack token, which makes it look like the
  whole build has stalled, when really redo-log is just waiting a long
  time for a particular subprocess to be able to continue.  We'll need to
  add a specific workaround for that.
This commit is contained in:
Avery Pennarun 2018-11-03 22:09:18 -04:00
commit b2411fe483
10 changed files with 315 additions and 23 deletions

View file

@ -1,7 +1,7 @@
import sys, os, errno, random, stat, signal, time import sys, os, errno, random, stat, signal, time
import vars, jwack, state, paths import vars, jwack, state, paths
from helpers import unlink, close_on_exec, join from helpers import unlink, close_on_exec, join
from log import log, log_, debug, debug2, err, warn from log import log, log_, debug, debug2, err, warn, check_tty
def _nice(t): def _nice(t):
@ -18,6 +18,72 @@ def _try_stat(filename):
raise raise
log_reader_pid = None
def start_stdin_log_reader(status, details):
if vars.RAW_LOGS: return
global log_reader_pid
r, w = os.pipe() # main pipe to redo-log
ar, aw = os.pipe() # ack pipe from redo-log --ack-fd
sys.stdout.flush()
sys.stderr.flush()
pid = os.fork()
if pid:
# parent
log_reader_pid = pid
os.close(r)
os.close(aw)
b = os.read(ar, 8)
if not b:
# subprocess died without sending us anything: that's bad.
err('failed to start redo-log subprocess; cannot continue.\n')
os._exit(99)
assert b == 'REDO-OK\n'
# now we know the subproc is running and will report our errors
# to stderr, so it's okay to lose our own stderr.
os.close(ar)
os.dup2(w, 1)
os.dup2(w, 2)
os.close(w)
check_tty()
else:
# child
try:
os.close(ar)
os.close(w)
os.dup2(r, 0)
os.close(r)
argv = [
'redo-log',
'--recursive', '--follow',
'--ack-fd', str(aw),
('--status' if status else '--no-status'),
('--details' if details else '--no-details'),
'-'
]
os.execvp(argv[0], argv)
except Exception, e:
sys.stderr.write('redo-log: exec: %s\n' % e)
finally:
os._exit(99)
def await_log_reader():
if vars.RAW_LOGS: return
global log_reader_pid
if log_reader_pid > 0:
# never actually close fd#1 or fd#2; insanity awaits.
# replace it with something else instead.
# Since our stdout/stderr are attached to redo-log's stdin,
# this will notify redo-log that it's time to die (after it finishes
# reading the logs)
out = open('/dev/tty', 'w')
os.dup2(out.fileno(), 1)
os.dup2(out.fileno(), 2)
os.waitpid(log_reader_pid, 0)
class ImmediateReturn(Exception): class ImmediateReturn(Exception):
def __init__(self, rv): def __init__(self, rv):
Exception.__init__(self, "immediate return with exit code %d" % rv) Exception.__init__(self, "immediate return with exit code %d" % rv)
@ -44,12 +110,14 @@ class BuildJob:
assert(self.lock.owned) assert(self.lock.owned)
try: try:
try: try:
dirty = self.shouldbuildfunc(self.t) is_target, dirty = self.shouldbuildfunc(self.t)
except state.CyclicDependencyError: except state.CyclicDependencyError:
err('cyclic dependency while checking %s\n' % _nice(self.t)) err('cyclic dependency while checking %s\n' % _nice(self.t))
raise ImmediateReturn(208) raise ImmediateReturn(208)
if not dirty: if not dirty:
# target doesn't need to be built; skip the whole task # target doesn't need to be built; skip the whole task
if is_target and vars.DEBUG_LOCKS:
log('[unchanged] %s\n' % _nice(self.t))
return self._after2(0) return self._after2(0)
except ImmediateReturn, e: except ImmediateReturn, e:
return self._after2(e.rv) return self._after2(e.rv)
@ -117,6 +185,9 @@ class BuildJob:
firstline = open(os.path.join(dodir, dofile)).readline().strip() firstline = open(os.path.join(dodir, dofile)).readline().strip()
if firstline.startswith('#!/'): if firstline.startswith('#!/'):
argv[0:2] = firstline[2:].split(' ') argv[0:2] = firstline[2:].split(' ')
# make sure to create the logfile *before* writing the log about it.
# that way redo-log won't trace into an obsolete logfile.
if not vars.RAW_LOGS: open(state.logname(self.sf.id), 'w')
log('%s\n' % _nice(t)) log('%s\n' % _nice(t))
self.dodir = dodir self.dodir = dodir
self.basename = basename self.basename = basename
@ -173,6 +244,11 @@ class BuildJob:
os.dup2(self.f.fileno(), 1) os.dup2(self.f.fileno(), 1)
os.close(self.f.fileno()) os.close(self.f.fileno())
close_on_exec(1, False) close_on_exec(1, False)
if not vars.RAW_LOGS:
logf = open(state.logname(self.sf.id), 'w')
os.dup2(logf.fileno(), 2)
close_on_exec(2, False)
logf.close()
signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE
if vars.VERBOSE or vars.XTRACE: log_('* %s\n' % ' '.join(self.argv)) if vars.VERBOSE or vars.XTRACE: log_('* %s\n' % ' '.join(self.argv))
os.execvp(self.argv[0], self.argv) os.execvp(self.argv[0], self.argv)

View file

@ -37,6 +37,9 @@ def _timeout(sig, frame):
pass pass
# We make the pipes use the first available fd numbers starting at startfd.
# This makes it easier to differentiate different kinds of pipes when using
# strace.
def _make_pipe(startfd): def _make_pipe(startfd):
(a,b) = os.pipe() (a,b) = os.pipe()
fds = (fcntl.fcntl(a, fcntl.F_DUPFD, startfd), fds = (fcntl.fcntl(a, fcntl.F_DUPFD, startfd),

16
log.py
View file

@ -1,13 +1,9 @@
import sys, os import sys, os
import vars import vars
# By default, no output colouring.
RED = ""
GREEN = ""
YELLOW = ""
BOLD = ""
PLAIN = ""
def check_tty():
global RED, GREEN, YELLOW, BOLD, PLAIN
if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb': if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
# ...use ANSI formatting codes. # ...use ANSI formatting codes.
RED = "\x1b[31m" RED = "\x1b[31m"
@ -15,6 +11,14 @@ if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
YELLOW = "\x1b[33m" YELLOW = "\x1b[33m"
BOLD = "\x1b[1m" BOLD = "\x1b[1m"
PLAIN = "\x1b[m" PLAIN = "\x1b[m"
else:
RED = ""
GREEN = ""
YELLOW = ""
BOLD = ""
PLAIN = ""
check_tty()
def log_(s): def log_(s):

View file

@ -14,11 +14,13 @@ def should_build(t):
raise builder.ImmediateReturn(32) raise builder.ImmediateReturn(32)
dirty = deps.isdirty(f, depth = '', max_changed = vars.RUNID, dirty = deps.isdirty(f, depth = '', max_changed = vars.RUNID,
already_checked=[]) already_checked=[])
return dirty==[f] and deps.DIRTY or dirty return f.is_generated, dirty==[f] and deps.DIRTY or dirty
rv = 202 rv = 202
try: try:
if vars_init.is_toplevel:
builder.start_stdin_log_reader(status=True, details=True)
if vars.TARGET and not vars.UNLOCKED: if vars.TARGET and not vars.UNLOCKED:
me = os.path.join(vars.STARTDIR, me = os.path.join(vars.STARTDIR,
os.path.join(vars.PWD, vars.TARGET)) os.path.join(vars.PWD, vars.TARGET))
@ -41,6 +43,10 @@ try:
finally: finally:
jwack.force_return_tokens() jwack.force_return_tokens()
except KeyboardInterrupt: except KeyboardInterrupt:
if vars_init.is_toplevel:
builder.await_log_reader()
sys.exit(200) sys.exit(200)
state.commit() state.commit()
if vars_init.is_toplevel:
builder.await_log_reader()
sys.exit(rv) sys.exit(rv)

1
redo-log Symbolic link
View file

@ -0,0 +1 @@
redo-log.py

175
redo-log.py Executable file
View file

@ -0,0 +1,175 @@
#!/usr/bin/env python
import errno, os, re, sys, time
import options
optspec = """
redo-log [options...] [targets...]
--
r,recursive show build logs for dependencies too
u,unchanged show lines for dependencies not needing to be rebuilt
f,follow keep watching for more lines to be appended (like tail -f)
no-details only show 'redo' recursion trace, not build output
no-colorize don't colorize 'redo' log messages
no-status don't display build summary line in --follow
ack-fd= (internal use only) print REDO-OK to this fd upon starting
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])
targets = extra
import vars_init
vars_init.init(list(targets))
import vars, state
already = set()
queue = []
depth = []
total_lines = 0
status = None
# regexp for matching "redo" lines in the log, which we use for recursion.
# format:
# redo path/to/target which might have spaces
# redo [unchanged] path/to/target which might have spaces
# redo path/to/target which might have spaces (comment)
# FIXME: use a more structured format when writing the logs.
# That will prevent false positives and negatives. Then transform the
# structured format into a user-friendly format when printing in redo-log.
REDO_LINE_RE = re.compile(r'^redo\s+(\[\w+\] )?([^(:]+)( \([^)]+\))?\n$')
def is_locked(fid):
return (fid is not None) and not state.Lock(fid=fid).trylock()
def catlog(t):
global total_lines, status
if t in already:
return
depth.append(t)
already.add(t)
if t == '-':
f = sys.stdin
fid = None
logname = None
else:
try:
sf = state.File(name=t, allow_add=False)
except KeyError:
sys.stderr.write('redo-log: %r: not known to redo.\n' % (t,))
sys.exit(24)
fid = sf.id
del sf
state.rollback()
logname = state.logname(fid)
f = None
delay = 0.01
was_locked = is_locked(fid)
line_head = ''
while 1:
if not f:
try:
f = open(logname)
except IOError, e:
if e.errno == errno.ENOENT:
# ignore files without logs
pass
else:
raise
if f:
# Note: normally includes trailing \n.
# In 'follow' mode, might get a line with no trailing \n
# (eg. when ./configure is halfway through a test), which we
# deal with below.
line = f.readline()
else:
line = None
if not line and (not opt.follow or not was_locked):
# file not locked, and no new lines: done
break
if not line:
was_locked = is_locked(fid)
if opt.follow:
if opt.status:
# FIXME use actual terminal width here
head = '[redo] %s ' % ('{:,}'.format(total_lines))
tail = ''
for i in reversed(depth):
n = os.path.basename(i)
if 65 - len(head) - len(tail) < len(n) + 1:
tail = '... ' + tail
break
else:
tail = n + ' ' + tail
status = head + tail
sys.stdout.flush()
sys.stderr.write('\r%-70.70s\r' % status)
time.sleep(min(delay, 1.0))
delay += 0.01
continue
total_lines += 1
delay = 0.01
if not line.endswith('\n'):
line_head += line
continue
if line_head:
line = line_head + line
line_head = ''
if status:
sys.stdout.flush()
sys.stderr.write('\r%-70.70s\r' % '')
status = None
g = re.match(REDO_LINE_RE, line)
if g:
attr, name, comment = g.groups()
if attr == '[unchanged] ':
if opt.unchanged:
if name not in already:
sys.stdout.write(line)
if opt.recursive:
catlog(name)
else:
sys.stdout.write(line)
if opt.recursive and (not comment or comment == ' (WAITING)'):
assert name
catlog(name)
else:
if opt.details:
sys.stdout.write(line)
if status:
sys.stdout.flush()
sys.stderr.write('\r%-70.70s\r' % '')
status = None
if line_head:
# partial line never got terminated
print line_head
assert(depth[-1] == t)
depth.pop(-1)
try:
if not targets:
sys.stderr.write('redo-log: give at least one target; maybe "all"?\n')
sys.exit(1)
if opt.status < 2 and not os.isatty(2):
opt.status = False
if opt.ack_fd:
ack_fd = int(opt.ack_fd)
assert(ack_fd > 2)
if os.write(ack_fd, 'REDO-OK\n') != 8:
raise Exception('write to ack_fd returned wrong length')
os.close(ack_fd)
queue += targets
while queue:
t = queue.pop(0)
if t != '-':
print 'redo %s' % t
catlog(t)
except KeyboardInterrupt:
sys.exit(200)
except IOError, e:
if e.errno == errno.EPIPE:
pass
else:
raise

16
redo.py
View file

@ -12,6 +12,9 @@ v,verbose print commands as they are read from .do files (variables intact)
x,xtrace print commands as they are executed (variables expanded) x,xtrace print commands as they are executed (variables expanded)
k,keep-going keep going as long as possible even if some targets fail k,keep-going keep going as long as possible even if some targets fail
shuffle randomize the build order to find dependency bugs shuffle randomize the build order to find dependency bugs
no-details only show 'redo' recursion trace (to see more later, use redo-log)
no-status don't display build summary line at the bottom of the screen
raw-logs don't use redo-log, just send all output straight to stderr
debug-locks print messages about file locking (useful for debugging) debug-locks print messages about file locking (useful for debugging)
debug-pids print process ids as part of log messages (useful for debugging) debug-pids print process ids as part of log messages (useful for debugging)
version print the current version and exit version print the current version and exit
@ -35,7 +38,10 @@ if opt.keep_going:
os.environ['REDO_KEEP_GOING'] = '1' os.environ['REDO_KEEP_GOING'] = '1'
if opt.shuffle: if opt.shuffle:
os.environ['REDO_SHUFFLE'] = '1' os.environ['REDO_SHUFFLE'] = '1'
if opt.debug_locks: if opt.raw_logs:
os.environ['REDO_RAW_LOGS'] = '1'
if opt.debug_locks or not os.environ.get('REDO_RAW_LOGS'):
# FIXME: force-enabled for redo-log, for now
os.environ['REDO_DEBUG_LOCKS'] = '1' os.environ['REDO_DEBUG_LOCKS'] = '1'
if opt.debug_pids: if opt.debug_pids:
os.environ['REDO_DEBUG_PIDS'] = '1' os.environ['REDO_DEBUG_PIDS'] = '1'
@ -47,6 +53,8 @@ import vars, state, builder, jwack
from log import warn, err from log import warn, err
try: try:
if vars_init.is_toplevel:
builder.start_stdin_log_reader(status=opt.status, details=opt.details)
for t in targets: for t in targets:
if os.path.exists(t): if os.path.exists(t):
f = state.File(name=t) f = state.File(name=t)
@ -61,13 +69,17 @@ try:
jwack.setup(j) jwack.setup(j)
try: try:
assert(state.is_flushed()) assert(state.is_flushed())
retcode = builder.main(targets, lambda t: True) retcode = builder.main(targets, lambda t: (True, True))
assert(state.is_flushed()) assert(state.is_flushed())
finally: finally:
try: try:
state.rollback() state.rollback()
finally: finally:
jwack.force_return_tokens() jwack.force_return_tokens()
if vars_init.is_toplevel:
builder.await_log_reader()
sys.exit(retcode) sys.exit(retcode)
except KeyboardInterrupt: except KeyboardInterrupt:
if vars_init.is_toplevel:
builder.await_log_reader()
sys.exit(200) sys.exit(200)

View file

@ -174,7 +174,7 @@ class File(object):
# use this mostly to avoid accidentally assigning to typos # use this mostly to avoid accidentally assigning to typos
__slots__ = ['id'] + _file_cols[1:] __slots__ = ['id'] + _file_cols[1:]
def _init_from_idname(self, id, name): def _init_from_idname(self, id, name, allow_add):
q = ('select %s from Files ' % join(', ', _file_cols)) q = ('select %s from Files ' % join(', ', _file_cols))
if id != None: if id != None:
q += 'where rowid=?' q += 'where rowid=?'
@ -189,7 +189,9 @@ class File(object):
row = d.execute(q, l).fetchone() row = d.execute(q, l).fetchone()
if not row: if not row:
if not name: if not name:
raise Exception('No file with id=%r name=%r' % (id, name)) raise KeyError('No file with id=%r name=%r' % (id, name))
elif not allow_add:
raise KeyError('No file with name=%r' % (name,))
try: try:
_write('insert into Files (name) values (?)', [name]) _write('insert into Files (name) values (?)', [name])
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
@ -207,17 +209,17 @@ class File(object):
if self.name == ALWAYS and self.changed_runid < vars.RUNID: if self.name == ALWAYS and self.changed_runid < vars.RUNID:
self.changed_runid = vars.RUNID self.changed_runid = vars.RUNID
def __init__(self, id=None, name=None, cols=None): def __init__(self, id=None, name=None, cols=None, allow_add=True):
if cols: if cols:
return self._init_from_cols(cols) return self._init_from_cols(cols)
else: else:
return self._init_from_idname(id, name) return self._init_from_idname(id, name, allow_add=allow_add)
def __repr__(self): def __repr__(self):
return "File(%r)" % (self.nicename(),) return "File(%r)" % (self.nicename(),)
def refresh(self): def refresh(self):
self._init_from_idname(self.id, None) self._init_from_idname(self.id, None, allow_add=False)
def save(self): def save(self):
cols = join(', ', ['%s=?'%i for i in _file_cols[2:]]) cols = join(', ', ['%s=?'%i for i in _file_cols[2:]])
@ -324,6 +326,11 @@ def files():
yield File(cols=cols) yield File(cols=cols)
def logname(fid):
"""Given the id of a File, return the filename of its build log."""
return os.path.join(vars.BASE, '.redo', 'log.%d' % fid)
# FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python # FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python
# doesn't do the lockdata structure in a portable way, so we have to use # doesn't do the lockdata structure in a portable way, so we have to use
# fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's # fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's
@ -365,6 +372,7 @@ class Lock:
raise raise
else: else:
self.owned = True self.owned = True
return self.owned
def waitlock(self): def waitlock(self):
self.check() self.check()

View file

@ -16,6 +16,7 @@ DEBUG_PIDS = os.environ.get('REDO_DEBUG_PIDS', '') and 1 or 0
VERBOSE = os.environ.get('REDO_VERBOSE', '') and 1 or 0 VERBOSE = os.environ.get('REDO_VERBOSE', '') and 1 or 0
XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0 XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0
KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0 KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0
RAW_LOGS = os.environ.get('REDO_RAW_LOGS', '') and 1 or 0
SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0 SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0
STARTDIR = os.environ.get('REDO_STARTDIR', '') STARTDIR = os.environ.get('REDO_STARTDIR', '')
RUNID = atoi(os.environ.get('REDO_RUNID')) or None RUNID = atoi(os.environ.get('REDO_RUNID')) or None

View file

@ -1,9 +1,13 @@
import sys, os import sys, os
is_toplevel = False
def init_no_state(): def init_no_state():
if not os.environ.get('REDO'): if not os.environ.get('REDO'):
os.environ['REDO'] = 'NOT_DEFINED' os.environ['REDO'] = 'NOT_DEFINED'
is_toplevel = True
if not os.environ.get('REDO_BASE'): if not os.environ.get('REDO_BASE'):
os.environ['REDO_BASE'] = 'NOT_DEFINED' os.environ['REDO_BASE'] = 'NOT_DEFINED'
@ -11,6 +15,8 @@ def init_no_state():
def init(targets): def init(targets):
if not os.environ.get('REDO'): if not os.environ.get('REDO'):
# toplevel call to redo # toplevel call to redo
global is_toplevel
is_toplevel = True
if len(targets) == 0: if len(targets) == 0:
targets.append('all') targets.append('all')
exenames = [os.path.abspath(sys.argv[0]), exenames = [os.path.abspath(sys.argv[0]),